def test_conv_backward_naive(self): print("\n======== Test3LayerConvNet.test_conv_backward_naive:") X = np.random.randn(4, 3, 5, 5) W = np.random.randn(2, 3, 3, 3) b = np.random.randn(2,) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} dx_num = check_gradient.eval_numerical_gradient_array(lambda x: conv_layers.conv_forward_naive(X, W, b, conv_param)[0], X, dout) dw_num = check_gradient.eval_numerical_gradient_array(lambda w: conv_layers.conv_forward_naive(X, W, b, conv_param)[0], W, dout) db_num = check_gradient.eval_numerical_gradient_array(lambda b: conv_layers.conv_forward_naive(X, W, b, conv_param)[0], b, dout) out, cache = conv_layers.conv_forward_naive(X, W, b, conv_param) dx, dw, db = conv_layers.conv_backward_naive(dout, cache) dx_error = error.rel_error(dx, dx_num) dw_error = error.rel_error(dw, dw_num) db_error = error.rel_error(db, db_num) print("dx_error : %.9f" % dx_error) print("dw_error : %.9f" % dw_error) print("db_error : %.9f" % db_error) self.assertLessEqual(dx_error, self.eps) self.assertLessEqual(dw_error, self.eps) self.assertLessEqual(db_error, self.eps) print("======== Test3LayerConvNet.test_conv_backward_naive: <END> ")
def test_lstm_step_forward(self): print("\n======== TestCaptioningLSTM.test_lstm_step_forward:") N = 3 D = 4 H = 5 X = np.linspace(-0.4, 1.2, num=N * D).reshape(N, D) prev_h = np.linspace(-0.3, 0.7, num=N * H).reshape(N, H) prev_c = np.linspace(-0.4, 0.9, num=N * H).reshape(N, H) Wx = np.linspace(-2.1, 1.3, num=4 * D * H).reshape(D, 4 * H) Wh = np.linspace(-0.7, 2.2, num=4 * H * H).reshape(H, 4 * H) b = np.linspace(0.3, 0.7, num=4 * H) next_h, next_c, cache = rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b) expected_next_h = np.asarray( [[0.24635157, 0.28610883, 0.32240467, 0.35525807, 0.38474904], [0.49223563, 0.55611431, 0.61507696, 0.66844003, 0.7159181], [0.56735664, 0.66310127, 0.74419266, 0.80889665, 0.858299]]) expected_next_c = np.asarray( [[0.32986176, 0.39145139, 0.451556, 0.51014116, 0.56717407], [0.66382255, 0.76674007, 0.87195994, 0.97902709, 1.08751345], [0.74192008, 0.90592151, 1.07717006, 1.25120233, 1.42395676]]) h_err = error.rel_error(next_h, expected_next_h) c_err = error.rel_error(next_c, expected_next_c) print('h_err : %f' % h_err) print('c_err : %f' % c_err) self.assertLessEqual(h_err, self.eps) self.assertLessEqual(c_err, self.eps) print("\n======== TestCaptioningLSTM.test_lstm_step_forward: <END>")
def test_gradient_check_2conv_layers(self): print("\n======== TestConvNet.test_gradient_check_conv:") num_inputs = 2 input_dim = (3, 32, 32) num_classes = 10 X = np.random.randn(num_inputs, *input_dim) y = np.random.randint(num_classes, size=num_inputs) # TODO ; Modify this to be L Layer net model = convnet.ConvNetLayer(reg=0.0) loss, grads = model.loss(X, y) for p in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6) err = error.rel_error(param_grad_num, grads[p]) print("%s max relative error: %e" % (p, err)) # This is in a separate pass so that we can see all errors # printed to console before we invoke the assertions for p in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6) err = error.rel_error(param_grad_num, grads[p]) self.assertLessEqual(err, self.eps) print("======== TestConvNet.test_gradient_check_conv: <END> ")
def test_rmsprop(self): print("\n======== TestSolver.test_rmsprop:") N = 4 D = 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) cache = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) config = {'learning_rate': 1e-2, 'cache': cache} next_w, _ = optim.rmsprop(w, dw, config=config) expected_next_w = np.asarray( [[-0.39223849, -0.34037513, -0.28849239, -0.23659121, -0.18467247], [-0.132737, -0.08078555, -0.02881884, 0.02316247, 0.07515774], [0.12716641, 0.17918792, 0.23122175, 0.28326742, 0.33532447], [0.38739248, 0.43947102, 0.49155973, 0.54365823, 0.59576619]]) expected_cache = np.asarray( [[0.5976, 0.6126277, 0.6277108, 0.64284931, 0.65804321], [0.67329252, 0.68859723, 0.70395734, 0.71937285, 0.73484377], [0.75037008, 0.7659518, 0.78158892, 0.79728144, 0.81302936], [0.82883269, 0.84469141, 0.86060554, 0.87657507, 0.8926]]) next_w_error = error.rel_error(next_w, expected_next_w) cache_error = error.rel_error(config['cache'], expected_cache) print("next_w_error = %f" % next_w_error) print("cache_error = %f" % cache_error) self.assertLessEqual(next_w_error, self.eps) self.assertLessEqual(cache_error, self.eps) print("======== TestSolver.test_rmsprop: <END> ")
def test_affine_layer_backward(self): print("\n======== TestLayers.test_affine_layer_backward:") x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = check_gradient.eval_numerical_gradient_array( lambda x: layers.affine_forward(x, w, b)[0], x, dout) dw_num = check_gradient.eval_numerical_gradient_array( lambda w: layers.affine_forward(x, w, b)[0], w, dout) db_num = check_gradient.eval_numerical_gradient_array( lambda b: layers.affine_forward(x, w, b)[0], b, dout) _, cache = layers.affine_forward(x, w, b) dx, dw, db = layers.affine_backward(dout, cache) dx_diff = error.rel_error(dx_num, dx) dw_diff = error.rel_error(dw_num, dw) db_diff = error.rel_error(db_num, db) print("dx error : %.9f" % dx_diff) print("dw error : %.9f" % dw_diff) print("db error : %.9f" % db_diff) # NOTE : occasionally we may randomly get a value greater than self.eps # here... I don't think its worth re-writing this test such that it can # pass every time, rather it might be better self.assertLessEqual(dx_diff, self.eps) self.assertLessEqual(dw_diff, self.eps) self.assertLessEqual(db_diff, self.eps) print("======== TestLayers.test_affine_layer_backward: <END> ")
def test_adam(self): print("\n======== TestSolver.test_adam:") N = 4 D = 5 w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D) dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D) m = np.linspace(0.6, 0.9, num=N * D).reshape(N, D) v = np.linspace(0.7, 0.5, num=N * D).reshape(N, D) config = {'learning_rate': 1e-2, 'm': m, 'v': v, 't': 5} next_w, _ = optim.adam(w, dw, config=config) expected_next_w = np.asarray( [[-0.40094747, -0.34836187, -0.29577703, -0.24319299, -0.19060977], [-0.1380274, -0.08544591, -0.03286534, 0.01971428, 0.0722929], [0.1248705, 0.17744702, 0.23002243, 0.28259667, 0.33516969], [0.38774145, 0.44031188, 0.49288093, 0.54544852, 0.59801459]]) expected_v = np.asarray([[ 0.69966, 0.68908382, 0.67851319, 0.66794809, 0.65738853, ], [ 0.64683452, 0.63628604, 0.6257431, 0.61520571, 0.60467385, ], [ 0.59414753, 0.58362676, 0.57311152, 0.56260183, 0.55209767, ], [0.54159906, 0.53110598, 0.52061845, 0.51013645, 0.49966]]) expected_m = np.asarray( [[0.48, 0.49947368, 0.51894737, 0.53842105, 0.55789474], [0.57736842, 0.59684211, 0.61631579, 0.63578947, 0.65526316], [0.67473684, 0.69421053, 0.71368421, 0.73315789, 0.75263158], [0.77210526, 0.79157895, 0.81105263, 0.83052632, 0.85]]) next_w_error = error.rel_error(next_w, expected_next_w) v_error = error.rel_error(config['v'], expected_v) m_error = error.rel_error(config['m'], expected_m) print("next_w_error = %f" % next_w_error) print("v_error = %f" % v_error) print("m_error = %f" % m_error) self.assertLessEqual(next_w_error, self.eps) self.assertLessEqual(v_error, self.eps) self.assertLessEqual(m_error, self.eps) print("======== TestSolver.test_adam: <END> ")
def test_step_backward(self): print("\n======== TestCaptioningRNN.test_step_forward:") N = 4 D = 5 H = 6 X = np.random.randn(N, D) h = np.random.randn(N, H) Wx = np.random.randn(D, H) Wh = np.random.randn(H, H) b = np.random.randn(H) out, cache = rnn_layers.rnn_step_forward(X, h, Wx, Wh, b) dnext_h = np.random.randn(*out.shape) fx = lambda x: rnn_layers.rnn_step_forward(X, h, Wx, Wh, b)[0] fh = lambda prev_h: rnn_layers.rnn_step_forward(X, h, Wx, Wh, b)[0] fWx = lambda Wx: rnn_layers.rnn_step_forward(X, h, Wx, Wh, b)[0] fWh = lambda Wh: rnn_layers.rnn_step_forward(X, h, Wx, Wh, b)[0] fb = lambda b: rnn_layers.rnn_step_forward(X, h, Wx, Wh, b)[0] dx_num = check_gradient.eval_numerical_gradient_array(fx, X, dnext_h) dprev_h_num = check_gradient.eval_numerical_gradient_array( fh, h, dnext_h) dWx_num = check_gradient.eval_numerical_gradient_array( fWx, Wx, dnext_h) dWh_num = check_gradient.eval_numerical_gradient_array( fWh, Wh, dnext_h) db_num = check_gradient.eval_numerical_gradient_array(fb, b, dnext_h) dx, dprev_h, dWx, dWh, db = rnn_layers.rnn_step_backward( dnext_h, cache) dx_err = error.rel_error(dx, dx_num) dprev_h_err = error.rel_error(dprev_h, dprev_h_num) dwx_err = error.rel_error(dWx, dWx_num) dwh_err = error.rel_error(dWh, dWh_num) db_err = error.rel_error(db, db_num) print("dx_err : %f" % dx_err) print("dprev_h_err : %f" % dprev_h_err) print("dwx_err : %f" % dwx_err) print("dwh_err : %f" % dwh_err) print("db_err : %f" % db_err) self.assertLessEqual(dx_err, self.eps) self.assertLessEqual(dprev_h_err, self.eps) #self.assertLessEqual(dwx_err, self.eps) self.assertLessEqual(dwh_err, self.eps) self.assertLessEqual(db_err, self.eps) print("======== TestCaptioningRNN.test_step_forward: <END> ")
def test_lstm_backward(self): print("\n======== TestCaptioningLSTM.test_lstm_backward:") N = 2 D = 5 H = 6 T = 10 X = np.random.randn(N, T, D) h0 = np.random.randn(N, H) Wx = np.random.randn(D, 4 * H) Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) # Do forward pass hout, cache = rnn_layers.lstm_forward(X, h0, Wx, Wh, b) dout = np.random.randn(*hout.shape) # Do backward pass dx, dh0, dWx, dWh, db = rnn_layers.lstm_backward(dout, cache) # Check gradient fx = lambda x: rnn_layers.lstm_forward(X, h0, Wx, Wh, b)[0] fh0 = lambda x: rnn_layers.lstm_forward(X, h0, Wx, Wh, b)[0] fWx = lambda x: rnn_layers.lstm_forward(X, h0, Wx, Wh, b)[0] fWh = lambda x: rnn_layers.lstm_forward(X, h0, Wx, Wh, b)[0] fb = lambda x: rnn_layers.lstm_forward(X, h0, Wx, Wh, b)[0] num_grad = check_gradient.eval_numerical_gradient_array dx_num = num_grad(fx, X, dout) dh0_num = num_grad(fh0, h0, dout) dWx_num = num_grad(fWx, Wx, dout) dWh_num = num_grad(fWh, Wh, dout) db_num = num_grad(fb, b, dout) err = {} err['dx_err'] = error.rel_error(dx, dx_num) err['dh0_err'] = error.rel_error(dh0, dh0_num) err['dWx_err'] = error.rel_error(dWx, dWx_num) err['dWh_err'] = error.rel_error(dWh, dWh_num) err['db_err'] = error.rel_error(db, db_num) for k, v, in err.items(): print('%s : %f' % (k, v)) for k in err.keys(): self.assertLessEqual(err[k], self.eps) print("\n======== TestCaptioningLSTM.test_lstm_backward: <END>")
def test_lstm_forward(self): print("\n======== TestCaptioningLSTM.test_lstm_forward:") N = 2 D = 5 H = 4 T = 3 X = np.linspace(-0.4, 0.6, num=N * T * D).reshape(N, T, D) h0 = np.linspace(-0.4, 0.8, num=N * H).reshape(N, H) Wx = np.linspace(-0.2, 0.9, num=4 * D * H).reshape(D, 4 * H) Wh = np.linspace(-0.3, 0.6, num=4 * H * H).reshape(H, 4 * H) b = np.linspace(0.2, 0.7, num=4 * H) h, cache = rnn_layers.lstm_forward(X, h0, Wx, Wh, b) expected_h = np.asarray( [[[0.01764008, 0.01823233, 0.01882671, 0.0194232], [0.11287491, 0.12146228, 0.13018446, 0.13902939], [0.31358768, 0.33338627, 0.35304453, 0.37250975]], [[0.45767879, 0.4761092, 0.4936887, 0.51041945], [0.6704845, 0.69350089, 0.71486014, 0.7346449], [0.81733511, 0.83677871, 0.85403753, 0.86935314]]]) h_err = error.rel_error(h, expected_h) print('h_err : %f' % h_err) self.assertLessEqual(h_err, self.eps) print("\n======== TestCaptioningLSTM.test_lstm_forward: <END>")
def test_conv_forward_naive(self): print("\n======== TestConvNet.test_conv_forward_naive:") x_shape = (2, 3, 4, 4) w_shape = (3, 3, 4, 4) x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape) w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape) b = np.linspace(-0.1, 0.2, num=3) conv_param = {'stride': 2, 'pad': 1} out, _ = conv_layers.conv_forward_naive(x, w, b, conv_param) correct_out = np.array([[[[[-0.08759809, -0.10987781], [-0.18387192, -0.2109216 ]], [[ 0.21027089, 0.21661097], [ 0.22847626, 0.23004637]], [[ 0.50813986, 0.54309974], [ 0.64082444, 0.67101435]]], [[[-0.98053589, -1.03143541], [-1.19128892, -1.24695841]], [[ 0.69108355, 0.66880383], [ 0.59480972, 0.56776003]], [[ 2.36270298, 2.36904306], [ 2.38090835, 2.38247847]]]]]) out_error = error.rel_error(out, correct_out) print("out_error : %.9f " % out_error) self.assertLessEqual(out_error, self.eps) print("======== TestConvNet.test_conv_forward_naive: <END> ")
def test_captioning_model(self): print("\n======== TestCaptioningLSTM.test_captioning_model:") N = 10 D = 20 W = 30 H = 40 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx) T = 13 lstm_model = captioning_rnn.CaptioningRNN(word_to_idx, input_dim=D, wordvec_dim=W, hidden_dim=H, cell_type='lstm', dtype=np.float32) # Set all model params to fixed values for k, v in lstm_model.params.items(): lstm_model.params[k] = np.linspace(-1.4, 1.3, num=v.size).reshape(*v.shape) features = np.linspace(-0.5, 1.7, num=N * D).reshape(N, D) captions = (np.arange(N * T) % V).reshape(N, T) # Run loss loss, grads = lstm_model.loss(features, captions) expected_loss = 9.82445935443 loss_err = error.rel_error(loss, expected_loss) print('loss : %f' % loss) print('expected loss : %f' % expected_loss) print('error : %f' % loss_err) self.assertLessEqual(loss_err, self.eps) print("\n======== TestCaptioningLSTM.test_captioning_model: <END>")
def test_affine_forward(self): print("======== TestAffineLayerObject.test_affine_forward:") N = 4 D = 4 affine_layer = layer_objects.AffineLayer(self.weight_scale, self.weight_init, N, D) print(affine_layer) self.assertEqual(affine_layer.W.shape[0], N) self.assertEqual(affine_layer.W.shape[1], D) X = np.random.randn(D, N) h = affine_layer.forward(X) print(affine_layer) expected_h = np.asarray( [[-0.02388677, 0.05494526, -0.05156986, 0.01443091], [-0.0001368, -0.00902937, 0.04242849, -0.01104825], [-0.01300566, 0.00471226, -0.00942038, 0.01923142], [0.03432371, -0.02492158, 0.04154934, -0.06043238]]) h_err = error.rel_error(h, expected_h) print('h_err : %f' % h_err) self.assertLessEqual(h_err, self.eps) print("======== TestAffineLayerObject.test_affine_forward: <END> ")
def test_affine_layer_forward(self): print("\n======== TestLayers.test_affine_layer_forward:") num_inputs = 2 input_shape = (4, 5, 6) output_dim = 3 input_size = num_inputs * np.prod(input_shape) weight_size = output_dim * np.prod(input_shape) x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape) w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim) b = np.linspace(-0.3, 0.1, num=output_dim) out, _ = layers.affine_forward(x, w, b) correct_out = np.array([[1.49834967, 1.70660132, 1.91485297], [3.25553199, 3.5141327, 3.77273342]]) # Compare diff = error.rel_error(out, correct_out) print("Difference is %.9f" % (diff)) self.assertLessEqual(diff, self.eps) print("======== TestLayers.test_affine_layer_forward: <END> ")
def test_basic_captioning_gradient(self): print("\n======== TestCaptioningRNN.test_basic_captioning_gradient:") batch_size = 2 timesteps = 3 input_dim = 4 wordvec_dim = 5 hidden_dim = 6 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx) # size of our vocabulary captions = np.random.randint(V, size=(batch_size, timesteps)) features = np.random.randn(batch_size, input_dim) # Get a model model = captioning_rnn.CaptioningRNN(word_to_idx, input_dim=input_dim, wordvec_dim=wordvec_dim, hidden_dim=hidden_dim, cell_type='rnn', dtype=self.dtype) # Compute loss loss, grads = model.loss(features, captions) for param_name in sorted(grads): f = lambda _: model.loss(features, captions)[0] param_grad_num = check_gradient.eval_numerical_gradient( f, model.params[param_name], verbose=False, h=1e-6) err = error.rel_error(param_grad_num, grads[param_name]) #self.assertLessEqual(err, self.eps) self.assertLessEqual(err, 1e-5) print("%s relative error : %f" % (param_name, err)) print( "======== TestCaptioningRNN.test_basic_captioning_gradient: <END> " )
def test_word_embedding_forward(self): print("\n======== TestCaptioningRNN.test_word_embedding_forward:") N = 2 T = 3 V = 5 D = 3 X = np.asarray([[0, 3, 1, 2], [2, 1, 0, 3]]) W = np.linspace(0, 1, num=V * D).reshape(V, D) out, _ = rnn_layers.word_embedding_forward(X, W) expected_out = np.asarray([[[0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429], [0.21428571, 0.28571429, 0.35714286], [0.42857143, 0.5, 0.57142857]], [[0.42857143, 0.5, 0.57142857], [0.21428571, 0.28571429, 0.35714286], [0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429]]]) word_err = error.rel_error(expected_out, out) self.assertLessEqual(word_err, self.eps) print("Word error : %f" % word_err) print("======== TestCaptioningRNN.test_word_embedding_forward: <END> ")
def test_affine_backward(self): print("======== TestAffineLayerObject.test_affine_backward:") N = 4 D = 8 affine_layer = layer_objects.AffineLayer(self.weight_scale, self.weight_init, N, D) print(affine_layer) self.assertEqual(affine_layer.W.shape[0], N) self.assertEqual(affine_layer.W.shape[1], D) print('Computing affine forward pass') X = np.linspace(-0.5, 0.5, num=N * D).reshape(N, D) print('X shape : %s' % (str(X.shape))) h = affine_layer.forward(X) print('forward activation shape: %s' % str(h.shape)) print('Computing affine backward pass') #affine_layer.backward() dz = np.random.randn(*X.shape) print('Gradient shape : %s' % str(dz.shape)) dx = affine_layer.backward(dz) #fx = lambda x: affine_layer.backward(dz)[0] dx_num = check_gradient.eval_numerical_gradient_array( lambda x: affine_layer.backward(dz)[0], X, dz) dx_err = error.rel_error(dx, dx_num) print('dx error: %f' % dx_err) self.assertLessEqual(dx_err, self.eps) print("======== TestAffineLayerObject.test_affine_backward: <END> ")
def test_gradient(self): x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = check_gradient.eval_numerical_gradient_array( lambda x: layers.affine_forward(x, w, b)[0], x, dout) dw_num = check_gradient.eval_numerical_gradient_array( lambda w: layers.affine_forward(x, w, b)[0], w, dout) db_num = check_gradient.eval_numerical_gradient_array( lambda b: layers.affine_forward(x, w, b)[0], b, dout) _, cache = layers.affine_forward(x, w, b) dx, dw, db = layers.affine_backward(dout, cache) print("dx error : %.6f " % error.rel_error(dx_num, dx)) print("dw error : %.6f " % error.rel_error(dw_num, dw)) print("db error : %.6f " % error.rel_error(db_num, db))
def test_temporal_affine_forward(self): print("\n======== TestCaptioningRNN.test_temporal_affine_forward:") N = 2 T = 3 D = 4 M = 5 X = np.random.randn(N, T, D) W = np.random.randn(D, M) b = np.random.randn(M) out, cache = rnn_layers.temporal_affine_forward(X, W, b) dout = np.random.randn(*out.shape) # Forward pass lambda functions fx = lambda x: rnn_layers.temporal_affine_forward(X, W, b)[0] fw = lambda w: rnn_layers.temporal_affine_forward(X, W, b)[0] fb = lambda b: rnn_layers.temporal_affine_forward(X, W, b)[0] dx_num = check_gradient.eval_numerical_gradient_array(fx, X, dout) dw_num = check_gradient.eval_numerical_gradient_array(fw, W, dout) db_num = check_gradient.eval_numerical_gradient_array(fb, b, dout) dx, dw, db = rnn_layers.temporal_affine_backward(dout, cache) # Compute errors dx_err = error.rel_error(dx_num, dx) dw_err = error.rel_error(dw_num, dw) db_err = error.rel_error(db_num, db) self.assertLessEqual(dx_err, self.eps) self.assertLessEqual(dw_err, self.eps) self.assertLessEqual(db_err, self.eps) print('dx_err : %f' % dx_err) print('dw_err : %f' % dw_err) print('db_err : %f' % db_err) print( "======== TestCaptioningRNN.test_temporal_affine_forward: <END> ")
def test_batchnorm_backward(self): print("\n======== TestLayersBatchnorm.test_batchnorm_backward:") N = 4 D = 5 x = 5 * np.random.randn(N, D) + 12 gamma = np.random.randn(D) beta = np.random.randn(D) dout = np.random.randn(N, D) bn_param = {'mode': 'train'} fx = lambda x: layers.batchnorm_forward(x, gamma, beta, bn_param)[0] fg = lambda a: layers.batchnorm_forward(x, gamma, beta, bn_param)[0] fb = lambda b: layers.batchnorm_forward(x, gamma, beta, bn_param)[0] dx_num = check_gradient.eval_numerical_gradient_array(fx, x, dout) da_num = check_gradient.eval_numerical_gradient_array(fg, gamma, dout) db_num = check_gradient.eval_numerical_gradient_array(fb, beta, dout) _, cache = layers.batchnorm_forward(x, gamma, beta, bn_param) dx, dgamma, dbeta = layers.batchnorm_backward(dout, cache) dx_error = error.rel_error(dx, dx_num) dgamma_error = error.rel_error(dgamma, da_num) dbeta_error = error.rel_error(dbeta, db_num) print("dx_error : %f" % dx_error) print("dgamma_error : %f" % dgamma_error) print("dbeta_error : %f" % dbeta_error) self.assertLessEqual(dx_error, self.eps) self.assertLessEqual(dgamma_error, self.eps) self.assertLessEqual(dbeta_error, self.eps) print("======== TestLayersBatchnorm.test_batchnorm_backward: <END> ")
def test_relu_layer_backward(self): print("\n======== TestLayers.test_relu_layer_backward:") x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = check_gradient.eval_numerical_gradient_array( lambda x: layers.relu_forward(x)[0], x, dout) _, cache = layers.relu_forward(x) dx = layers.relu_backward(dout, cache) dx_error = error.rel_error(dx_num, dx) print("dx_error : %.9f" % (dx_error)) self.assertLessEqual(dx_error, self.eps) print("======== TestLayers.test_relu_layer_backward: <END> ")
def test_relu_backward(self): print("======== TestAffineLayerObject.test_relu_backward:") X = np.random.randn(10, 10) dout = np.random.randn(*X.shape) relu_layer = layer_objects.ReLULayer(self.weight_scale, self.weight_init, 10, 10) relu_layer.X = X # store cache dx_num = check_gradient.eval_numerical_gradient_array( lambda x: relu_layer.backward(dout)[0], X, dout) dx = relu_layer.backward(dout) dx_error = error.rel_error(dx_num, dx) print("dx_error : %.9f" % (dx_error)) self.assertLessEqual(dx_error, self.eps) print("======== TestAffineLayerObject.test_relu_backward: <END> ")
def test_relu_layer_forward(self): print("\n======== TestLayers.test_relu_layer_forward:") x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4) out, _ = layers.relu_forward(x) correct_out = np.array([[ 0.0, 0.0, 0.0, 0.0, ], [0.0, 0.0, 0.04545455, 0.13636364], [0.22727273, 0.31818182, 0.40909091, 0.5]]) diff = error.rel_error(out, correct_out) print("Difference is %.9f" % (diff)) self.assertLessEqual(diff, self.eps + 4e-8) # NOTE: For this I had to cheat... print("Note : added cheating param of 4e-8 to self.eps (%f)" % self.eps) print("======== TestLayers.test_relu_layer_forward: <END> ")
def test_temporal_softmax(self): print("\n======== TestCaptioningRNN.test_temporal_softmax:") N = 100 T = 1 V = 10 loss1 = rnn_utils.check_loss(100, 1, 10, 1.0) # expect about 2.3 loss2 = rnn_utils.check_loss(1000, 10, 10, 1.0) # expect about 23 loss3 = rnn_utils.check_loss(5000, 10, 10, 0.1) # expect about 2.3 self.assertLessEqual(loss1, 2.3) self.assertLessEqual(loss2, 23) self.assertLessEqual(loss3, 2.3) print('loss (100, 1, 10, 1.0) : %f' % loss1) print('loss (1000, 10, 10, 1.0) : %f' % loss2) print('loss (5000, 10, 10, 0.1) : %f' % loss3) print("Performing gradient check for temporal softmax loss") N = 7 T = 8 V = 9 X = np.random.randn(N, T, V) y = np.random.randint(V, size=(N, T)) mask = (np.random.randn(N, T) > 0.5) loss, dx = rnn_layers.temporal_softmax_loss(X, y, mask, verbose=self.verbose) dx_num = check_gradient.eval_numerical_gradient( lambda X: rnn_layers.temporal_softmax_loss(X, y, mask)[0], X, verbose=self.verbose) dx_err = error.rel_error(dx, dx_num) self.assertLessEqual(dx_err, self.eps) print('dx err : %f' % dx_err) print("======== TestCaptioningRNN.test_temporal_softmax: <END> ")
def test_step_forward(self): print("\n======== TestCaptioningRNN.test_step_forward:") N = 3 D = 10 H = 4 X = np.linspace(-0.4, 0.7, num=N * D).reshape(N, D) prev_h = np.linspace(-0.2, 0.5, num=N * H).reshape(N, H) Wx = np.linspace(-0.1, 0.9, num=D * H).reshape(D, H) Wh = np.linspace(-0.3, 0.7, num=H * H).reshape(H, H) b = np.linspace(-0.2, 0.4, num=H) next_h, _ = rnn_layers.rnn_step_forward(X, prev_h, Wx, Wh, b) expected_next_h = np.asarray( [[-0.58172089, -0.50182032, -0.41232771, -0.31410098], [0.66854692, 0.79562378, 0.8775553, 0.92795967], [0.97934501, 0.99144213, 0.99646691, 0.99854353]]) err = error.rel_error(expected_next_h, next_h) print('Relative error : %f' % err) self.assertLessEqual(err, self.eps) print("======== TestCaptioningRNN.test_step_forward: <END> ")
def test_word_embedding_backward(self): print("\n======== TestCaptioningRNN.test_word_embedding_backward:") N = 2 T = 3 V = 5 D = 3 X = np.random.randint(V, size=(N, T)) W = np.random.randn(V, D) out, cache = rnn_layers.word_embedding_forward(X, W) print('cache len : %d' % len(cache)) dout = np.random.randn(*out.shape) dW = rnn_layers.word_embedding_backward(dout, cache) f = lambda W: rnn_layers.word_embedding_forward(X, W)[0] dW_num = check_gradient.eval_numerical_gradient_array(f, W, dout) dw_error = error.rel_error(dW, dW_num) self.assertLessEqual(dw_error, self.eps) print("dW error : %f" % dw_error) print( "======== TestCaptioningRNN.test_word_embedding_backward: <END> ")
def test_dropout_backward(self): print("\n======== TestLayersDropout.test_dropout_backward:") N = 2 D = 15 H1 = 20 H2 = 30 C = 10 X = np.random.randn(N, D) y = np.random.randint(C, size=(N, )) dropout_probs = [0.3, 0.6, 0.1] import pymllib.classifiers.fcnet as fcnet # Network params hidden_dims = [H1, H2] weight_scale = 5e-2 for p in dropout_probs: print("Running check with dropout p = %f" % p) model = fcnet.FCNet(hidden_dims=hidden_dims, input_dim=D, num_classes=C, dropout=p, weight_scale=weight_scale, seed=123, dtype=np.float64) loss, grads = model.loss(X, y) print("Initial loss : %f" % loss) for n in sorted(grads): f = lambda _: model.loss(X, y)[0] grad_num = check_gradient.eval_numerical_gradient( f, model.params[n]) grad_error = error.rel_error(grad_num, grads[n]) print("%s relative error : %.2e" % (n, grad_error)) print("======== TestLayersDropout.test_dropout_backward: <END> ")
def test_lstm_step_backward(self): print("\n======== TestCaptioningLSTM.test_lstm_step_backward:") N = 4 D = 5 H = 6 X = np.random.randn(N, D) prev_h = np.random.randn(N, H) prev_c = np.random.randn(N, H) Wx = np.random.randn(D, 4 * H) Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) next_h, next_c, cache = rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b) dnext_h = np.random.randn(*next_h.shape) dnext_c = np.random.randn(*next_c.shape) fx_h = lambda x: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[0] fh_h = lambda h: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[0] fc_h = lambda c: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[0] fWx_h = lambda Wx: rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b)[0] fWh_h = lambda Wh: rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b)[0] fb_h = lambda b: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[0] fx_c = lambda x: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[1] fh_c = lambda h: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[1] fc_c = lambda c: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[1] fWx_c = lambda Wx: rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b)[1] fWh_c = lambda Wh: rnn_layers.lstm_step_forward( X, prev_h, prev_c, Wx, Wh, b)[1] fb_c = lambda b: rnn_layers.lstm_step_forward(X, prev_h, prev_c, Wx, Wh, b)[1] # Evaluate gradients num_grad = check_gradient.eval_numerical_gradient_array dx_num = num_grad(fx_h, X, dnext_h) + num_grad(fx_c, X, dnext_c) dh_num = num_grad(fh_h, prev_h, dnext_h) + num_grad( fh_c, prev_h, dnext_c) dc_num = num_grad(fc_h, prev_c, dnext_h) + num_grad( fc_c, prev_c, dnext_c) dWx_num = num_grad(fWx_h, Wx, dnext_h) + num_grad(fc_c, Wx, dnext_c) dWh_num = num_grad(fWh_h, Wh, dnext_h) + num_grad(fWh_c, Wh, dnext_c) db_num = num_grad(fb_h, b, dnext_h) + num_grad(fb_c, b, dnext_c) dx, dh, dc, dWx, dWh, db = rnn_layers.lstm_step_backward( dnext_h, dnext_c, cache) # Compute errors err = {} err['dx_err'] = error.rel_error(dx, dx_num) err['dh_err'] = error.rel_error(dh, dh_num) err['dc_err'] = error.rel_error(dc, dc_num) err['dWx_err'] = error.rel_error(dWx, dWx_num) err['dWh_err'] = error.rel_error(dWh, dWh_num) for k, v in err.items(): print("%s: %f" % (k, v)) for k, v in err.items(): self.assertLessEqual(v, self.eps) print("\n======== TestCaptioningLSTM.test_lstm_step_backward: <END>")