def test_backprop(self): x = np.random.randn(self.N, self.img_height, self.img_width) w = np.random.randn(self.D, self.H) b = np.random.randn(self.H) grad_output = np.random.randn(self.N, self.H) # Numerical gradient w.r.t inputs num_grad_x = eval_numerical_gradient_array(f=lambda x: self.layer.forward_prop(x, w, b), x=x, df=grad_output) # Numerical gradient w.r.t weights num_grad_w = eval_numerical_gradient_array(f=lambda w: self.layer.forward_prop(x, w, b), x=w, df=grad_output) # Numerical gradient w.r.t. biases num_grad_b = eval_numerical_gradient_array(f=lambda b: self.layer.forward_prop(x, w, b), x=b, df=grad_output) # Compute gradients using backprop algorithm grad_x, grad_w, grad_b = self.layer.backprop(grad_output) np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7) np.testing.assert_array_almost_equal(num_grad_w, grad_w, decimal=7) np.testing.assert_array_almost_equal(num_grad_b, grad_b, decimal=7)
def test_backward(self): np.random.seed(271) N, T, H, M = 2, 3, 4, 5 # Create the layer layer = TemporalAffineLayer(H, M) W = np.random.randn(H, M) b = np.random.randn(M) # Create some arbitrary inputs x = np.random.randn(N, T, H) out = layer.forward(x, W=W, b=b) grad_out = np.random.randn(*out.shape) grad_x, grad_W, grad_b = layer.backward(grad_out) fx = lambda x: layer.forward(x, W=W, b=b) fw = lambda W: layer.forward(x, W=W, b=b) fb = lambda b: layer.forward(x, W=W, b=b) grad_x_num = eval_numerical_gradient_array(fx, x, grad_out) grad_W_num = eval_numerical_gradient_array(fw, W, grad_out) grad_b_num = eval_numerical_gradient_array(fb, b, grad_out) self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
def loss(self, X, lag=None, phi=None, sigma=None, intercept=None): if lag is None: lag = self._lag if phi is None: phi = self.params['phi'] if sigma is None: sigma = self.params['sigma'] if intercept is None: intercept = self.params['intercept'] loglikelihood = self.get_loglikelihood(X, lag=lag, phi=phi, sigma=sigma, intercept=intercept) """grad_phi is a column vector""" grads = {} grads['phi'] = eval_numerical_gradient_array( lambda phi: self.get_loglikelihood(X, lag, phi, sigma, intercept), phi, 1) grads['intercept'] = eval_numerical_gradient_array( lambda intercept: self.get_loglikelihood(X, lag, phi, sigma, intercept), intercept, 1) grads['sigma'] = eval_numerical_gradient_array( lambda sigma: self.get_loglikelihood(X, lag, phi, sigma, intercept ), sigma, 1) return loglikelihood, grads
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) x = np.random.randn(N, D) dout = np.random.randn(N, C) layer_params = { 'input_size': D, 'output_size': C, 'reg': 0.0, 'weight_scale': 0.001 } layer = LinearLayer(layer_params) layer.initialize() layer.set_train_mode() out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['w'] dx_num = eval_numerical_gradient_array( lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['w'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) x = np.random.randn(N, D) dout = np.random.randn(N, C) layer = LinearModule(D, C) out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['weight'] dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)
def test_backprop(self): x = np.random.randn(self.N, self.D) grad_output = np.random.randn(*x.shape) # Numerical gradient w.r.t inputs num_grad_x = eval_numerical_gradient_array( f=lambda x: self.layer.forward_prop(x), x=x, df=grad_output) # Compute gradients using backprop algorithm grad_x = self.layer.backprop(grad_output) np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) #batch size D = np.random.choice(range(1, 100)) #num in_features C = np.random.choice(range(1, 10)) #num classes = out_features x = np.random.randn(N, D) #mini-batch dout = np.random.randn(N, C) #cross-entropy loss? layer = LinearModule(D, C) out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['weight'] dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)
def test_backward(self): np.random.seed(271) N, D, T, H = 2, 3, 10, 6 # Create the layer layer = LSTMRecurrentLayer(D, H) Wx = np.random.randn(D, 4 * H) Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) # Create some arbitrary inputs x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) hidden_state_over_time = layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) grad_h_over_time = np.random.randn(*hidden_state_over_time.shape) grad_x, grad_h0, grad_Wx, grad_Wh, grad_b = layer.backward( grad_h_over_time) fx = lambda x: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fh0 = lambda h0: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fWx = lambda Wx: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fWh = lambda Wh: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fb = lambda b: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) grad_x_num = eval_numerical_gradient_array(fx, x, grad_h_over_time) grad_h0_num = eval_numerical_gradient_array(fh0, h0, grad_h_over_time) grad_Wx_num = eval_numerical_gradient_array(fWx, Wx, grad_h_over_time) grad_Wh_num = eval_numerical_gradient_array(fWh, Wh, grad_h_over_time) grad_b_num = eval_numerical_gradient_array(fb, b, grad_h_over_time) self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_h0_num, grad_h0), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_Wx_num, grad_Wx), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_Wh_num, grad_Wh), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
def test_elu_backward(self): np.random.seed(42) rel_error_max = 1e-6 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) x = np.random.randn(N, D) dout = np.random.randn(*x.shape) layer = ELUModule() out = layer.forward(x) dx = layer.backward(dout) dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) self.assertLess(rel_error(dx, dx_num), rel_error_max)
def affine_relu_check(): np.random.seed(231) x = np.random.randn(2, 3, 4) w = np.random.randn(12, 10) b = np.random.randn(10) dout = np.random.randn(2, 10) out, cache = affine_relu_forward(x, w, b) dx, dw, db = affine_relu_backward(dout, cache) dx_num = eval_numerical_gradient_array( lambda x: affine_relu_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: affine_relu_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: affine_relu_forward(x, w, b)[0], b, dout) # Relative error should be around e-10 or less print('Testing affine_relu_forward and affine_relu_backward:') print('dx error: ', rel_error(dx_num, dx)) print('dw error: ', rel_error(dw_num, dw)) print('db error: ', rel_error(db_num, db)) pass
def test_linear_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) C = np.random.choice(range(1, 10)) x = np.random.randn(N, D) dout = np.random.randn(N, C) layer_params = {'input_size': D, 'output_size': C, 'reg': 0.0, 'weight_scale': 0.001} layer = LinearLayer(layer_params) layer.initialize() layer.set_train_mode() out = layer.forward(x) dx = layer.backward(dout) dw = layer.grads['w'] dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['w'], dout) self.assertLess(rel_error(dx, dx_num), rel_error_max) self.assertLess(rel_error(dw, dw_num), rel_error_max)
def test_relu_backward(self): np.random.seed(42) rel_error_max = 1e-6 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) x = np.random.randn(N, D) dout = np.random.randn(*x.shape) layer = ReLULayer() layer.initialize() layer.set_train_mode() out = layer.forward(x) dx = layer.backward(dout) dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout) self.assertLess(rel_error(dx, dx_num), rel_error_max)
def test_softmax_backward(self): np.random.seed(42) rel_error_max = 1e-5 for test_num in range(10): N = np.random.choice(range(1, 20)) D = np.random.choice(range(1, 100)) x = np.random.randn(N, D) dout = np.random.randn(*x.shape) layer = SoftMaxLayer() layer.initialize() layer.set_train_mode() out = layer.forward(x) dx = layer.backward(dout) dx_num = eval_numerical_gradient_array( lambda xx: layer.forward(xx), x, dout) self.assertLess(rel_error(dx, dx_num), rel_error_max)
def test_backward(self): np.random.seed(271) N, T, V, D = 50, 3, 5, 6 # Create the layer layer = WordEmbeddingLayer(V, D) layer.W = np.random.randn(V, D) # Create some arbitrary inputs x = np.random.randint(V, size=(N, T)) out = layer.forward(x) grad_out = np.random.randn(*out.shape) grad_W = layer.backward(grad_out) f = lambda W: layer.forward(x, W=W) grad_W_num = eval_numerical_gradient_array(f, layer.W, grad_out) self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
################################################################################### # Affine layer: backward. # ################################################################################### # In the file layers.py implement the affine_backward function. # # Once you are done you can test your implementation using numeric gradient. # ################################################################################### # Test the affine_backward function x = np.random.randn(10, 2, 3) theta = np.random.randn(6, 5) theta_0 = np.random.randn(5) dout = np.random.randn(10, 5) if layers.affine_forward(x,theta,theta_0)[0] is not None: dx_num = eval_numerical_gradient_array(lambda x: layers.affine_forward(x, theta, theta_0)[0], x, dout) dtheta_num = eval_numerical_gradient_array(lambda theta: layers.affine_forward(x, theta, theta_0)[0], theta, dout) dtheta_0_num = eval_numerical_gradient_array(lambda b: layers.affine_forward(x, theta, theta_0)[0], theta_0, dout) _, cache = layers.affine_forward(x, theta, theta_0) dx, dtheta, dtheta_0 = layers.affine_backward(dout, cache) # The error should be around 1e-10 print 'Testing affine_backward function:' print 'dx error (should be around 1e-10): ', rel_error(dx_num, dx) print 'dtheta error (should be around 1e-10): ', rel_error(dtheta_num, dtheta) print 'dtheta_0 error (should be around 1e-10): ', rel_error(dtheta_0_num, dtheta_0) # Problem 3.1.3 ###################################################################################
#------------------------------------------------------------------------------ #%% print('\n--------- affine_sigmoid_affine_backward test --------- ') np.random.seed(231) x = np.random.randn(10, 10) w1 = np.random.randn(10, 5) b1 = np.random.randn(5) w2 = np.random.randn(5, 7) b2 = np.random.randn(7) dout = np.random.randn(10, 7) dout1 = np.random.randn(10, 5) out, cache = affine_sigmoid_affine_forward(x, w1, b1, w2, b2) dx, dw1, db1, dw2, db2 = affine_sigmoid_affine_backward(dout, cache) dx_num = eval_numerical_gradient_array( lambda x: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], x, dout) dw1_num = eval_numerical_gradient_array( lambda w1: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], w1, dout) db1_num = eval_numerical_gradient_array( lambda b1: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], b1, dout) dw2_num = eval_numerical_gradient_array( lambda w2: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], w2, dout) db2_num = eval_numerical_gradient_array( lambda b2: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], b2, dout) #out, cache = affine_sigmoid_forward(x, w1, b1) #dx, dw, db = affine_sigmoid_backward(dout1, cache) #dx_num = eval_numerical_gradient_array(lambda x: # affine_sigmoid_forward(x, w1, b1)[0], x, dout1) print('\ndx_num: ', dx_num) print('\ndx: ', dx)
h = np.random.randn(N, H) Wx = np.random.randn(D, H) Wh = np.random.randn(H, H) b = np.random.randn(H) out, cache = rnn_step_forward(x, h, Wx, Wh, b) dnext_h = np.random.randn(*out.shape) fx = lambda x: rnn_step_forward(x, h, Wx, Wh, b)[0] fh = lambda prev_h: rnn_step_forward(x, h, Wx, Wh, b)[0] fWx = lambda Wx: rnn_step_forward(x, h, Wx, Wh, b)[0] fWh = lambda Wh: rnn_step_forward(x, h, Wx, Wh, b)[0] fb = lambda b: rnn_step_forward(x, h, Wx, Wh, b)[0] dx_num = eval_numerical_gradient_array(fx, x, dnext_h) dprev_h_num = eval_numerical_gradient_array(fh, h, dnext_h) dWx_num = eval_numerical_gradient_array(fWx, Wx, dnext_h) dWh_num = eval_numerical_gradient_array(fWh, Wh, dnext_h) db_num = eval_numerical_gradient_array(fb, b, dnext_h) dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache) print 'dx error: ', rel_error(dx_num, dx) print 'dprev_h error: ', rel_error(dprev_h_num, dprev_h) print 'dWx error: ', rel_error(dWx_num, dWx) print 'dWh error: ', rel_error(dWh_num, dWh) print 'db error: ', rel_error(db_num, db) #Vanilla RNN: forward N, T, D, H = 2, 3, 4, 5
print('Mean of test-time output: ', out_test.mean()) print('Fraction of train-time output set to zero: ', (out == 0).mean()) print('Fraction of test-time output set to zero: ', (out_test == 0).mean()) print() # # Dropout backward pass # In the file `cs231n/layers.py`, implement the backward pass for dropout. After doing so, run the following cell to numerically gradient-check your implementation. np.random.seed(231) x = np.random.randn(10, 10) + 10 dout = np.random.randn(*x.shape) dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123} out, cache = dropout_forward(x, dropout_param) dx = dropout_backward(dout, cache) dx_num = eval_numerical_gradient_array( lambda xx: dropout_forward(xx, dropout_param)[0], x, dout) print('dx relative error: ', rel_error(dx, dx_num)) # # Fully-connected nets with Dropout # In the file `cs231n/classifiers/fc_net.py`, modify your implementation to use dropout. Specificially, if the constructor the the net receives a nonzero value for the `dropout` parameter, then the net should add dropout immediately after every ReLU nonlinearity. After doing so, run the following to numerically gradient-check your implementation. np.random.seed(231) N, D, H1, H2, C = 2, 15, 20, 30, 10 X = np.random.randn(N, D) y = np.random.randint(C, size=(N, )) for dropout in [0, 0.25, 0.5]: print('Running check with dropout = ', dropout) model = FullyConnectedNet([H1, H2], input_dim=D,
from gradient_check import eval_numerical_gradient_array import numpy as np from layers import * N = 2 D = 3 M = 4 x = np.random.normal(size=(N, D)) w = np.random.normal(size=(D, M)) b = np.random.normal(size=(M, )) # dout = np.random.normal(size=(N, M)) dout = np.random.normal(size=(N, D)) # out, cache = affine_forward(x, w, b) # f=lambda x: affine_forward(x, w, b)[0] # grad = affine_backward(dout, cache)[0] # ngrad = eval_numerical_gradient_array(f, x, dout) # # print(grad - ngrad) out, cache = relu_forward(x) f = lambda x: relu_forward(x)[0] grad = relu_backward(dout, cache) ngrad = eval_numerical_gradient_array(f, x, dout) print(grad - ngrad)
imshow_noax(out[1, 1]) plt.show() # # Convolution: Naive backward pass # Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency. # # When you are done, run the following to check your backward pass with a numeric gradient check. np.random.seed(231) x = np.random.randn(4, 3, 5, 5) w = np.random.randn(2, 3, 3, 3) b = np.random.randn(2, ) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} dx_num = eval_numerical_gradient_array( lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout) out, cache = conv_forward_naive(x, w, b, conv_param) dx, dw, db = conv_backward_naive(dout, cache) # Your errors should be around 1e-8' print('Testing conv_backward_naive function') print('dx error: ', rel_error(dx, dx_num)) print('dw error: ', rel_error(dw, dw_num)) print('db error: ', rel_error(db, db_num)) # # Max pooling: Naive forward
################################################################################### # Implement the backward pass for the convolution operation in the # # function conv_backward_naive in the file layers.py. Again, you # # don't need to worry too much about computational efficiency. When you # # are done, run the following to check your backward pass with a numeric # # gradient check. # ################################################################################### x = np.random.randn(4, 3, 5, 5) theta = np.random.randn(2, 3, 3, 3) theta0 = np.random.randn(2,) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} if layers.conv_forward_naive(x,theta,theta0, conv_param)[0] is not None: dx_num = eval_numerical_gradient_array(lambda x: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], x, dout) dtheta_num = eval_numerical_gradient_array(lambda theta: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], theta, dout) dtheta0_num = eval_numerical_gradient_array(lambda theta0: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], theta0, dout) out, cache = layers.conv_forward_naive(x, theta, theta0, conv_param) dx, dtheta, dtheta0 = layers.conv_backward_naive(dout, cache) # Your errors should be around 1e-9 print 'Testing conv_backward_naive function' print 'dx error: ', rel_error(dx, dx_num) print 'dtheta error: ', rel_error(dtheta, dtheta_num) print 'dtheta0 error: ', rel_error(dtheta0, dtheta0_num) # Problem 3.2.3 ################################################################################### # Max pooling: Naive forward #
gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8]) out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param) print('After spatial batch normalization (nontrivial gamma, beta):') print(' Shape: ', out.shape) print(' Means: ', out.mean(axis=(0, 2, 3))) print(' Stds: ', out.std(axis=(0, 2, 3))) ''' ##############backward############## np.random.seed(231) N, C, H, W = 2, 3, 4, 5 x = 5 * np.random.randn(N, C, H, W) + 12 gamma = np.random.randn(C) beta = np.random.randn(C) dout = np.random.randn(N, C, H, W) bn_param = {'mode': 'train'} fx = lambda x: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] fg = lambda a: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] fb = lambda b: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) da_num = eval_numerical_gradient_array(fg, gamma, dout) db_num = eval_numerical_gradient_array(fb, beta, dout) _, cache = spatial_batchnorm_forward(x, gamma, beta, bn_param) dx, dgamma, dbeta = spatial_batchnorm_backward(dout, cache) print('dx error: ', rel_error(dx_num, dx)) print('dgamma error: ', rel_error(da_num, dgamma)) print('dbeta error: ', rel_error(db_num, dbeta))
[[-0.14526316, -0.13052632], [-0.08631579, -0.07157895]], [[-0.02736842, -0.01263158], [ 0.03157895, 0.04631579]]], [[[ 0.09052632, 0.10526316], [ 0.14947368, 0.16421053]], [[ 0.20842105, 0.22315789], [ 0.26736842, 0.28210526]], [[ 0.32631579, 0.34105263], [ 0.38526316, 0.4 ]]]]) # Compare your output with ours. Difference should be around 1e-8. print('Testing max_pool_forward_naive function:') print('difference: ', rel_error(out, correct_out)) ''' #######################################backward########################################### np.random.seed(231) x = np.random.randn(3, 2, 8, 8) dout = np.random.randn(3, 2, 4, 4) pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} dx_num = eval_numerical_gradient_array(lambda x: max_pool_forward_naive(x, pool_param)[0], x, dout) out, cache = max_pool_forward_naive(x, pool_param) dx = max_pool_backward_naive(dout, cache) # Your error should be around 1e-12 print('Testing max_pool_backward_naive function:') print('dx error: ', rel_error(dx, dx_num)) '''
def gradient_check(X, model, y): loss, grads = chess_convnet(X, model, y) dx_num = eval_numerical_gradient_array(lambda x: chess_convnet(x, model)[1]['W1'], x, grads) return rel_error(dx_num, grads['W1'])
import numpy as np from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array from layers import * # Test the affine_backward function x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
out, _ = affine_forward(x, w, b) correct_out = np.array([[ 1.49834967, 1.70660132, 1.91485297], [ 3.25553199, 3.5141327, 3.77273342]]) # Compare your output with ours. The error should be around e-9 or less. print('Testing affine_forward function:') print(rel_error(out, correct_out)<1e-8) np.random.seed(231) x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) # The error should be around e-10 or less print('Testing affine_backward function:') print('dx error: ', rel_error(dx_num, dx)<=1e-9) print('dw error: ', rel_error(dw_num, dw)<=1e-9) print('db error: ', rel_error(db_num, db)<=1e-9) # Test the relu_forward function
[ 3.25553199, 3.5141327, 3.77273342]]) # Compare your output with ours. The error should be around 1e-9. print('Testing affine_forward function:') print('difference: ', rel_maxError(out, correct_out)) #------------------------------------------------------------------------------ #%% # Test the affine_backward function print('\n--------- affine_backward test --------- ') np.random.seed(231) x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) _, cache = affine_forward(x, w, b) dx, dw, db = affine_backward(dout, cache) # The error should be around 1e-10 print('Testing affine_backward function:') print('dx error: ', rel_maxError(dx_num, dx)) print('dw error: ', rel_maxError(dw_num, dw)) print('db error: ', rel_maxError(db_num, db)) #------------------------------------------------------------------------------ #%% print('\n--------- affine_backward test --------- ') np.random.seed(231)
Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) out, cache = lstm_forward(x, h0, Wx, Wh, b) dout = np.random.randn(*out.shape) dx, dh0, dWx, dWh, db = lstm_backward(dout, cache) fx = lambda x: lstm_forward(x, h0, Wx, Wh, b)[0] fh0 = lambda h0: lstm_forward(x, h0, Wx, Wh, b)[0] fWx = lambda Wx: lstm_forward(x, h0, Wx, Wh, b)[0] fWh = lambda Wh: lstm_forward(x, h0, Wx, Wh, b)[0] fb = lambda b: lstm_forward(x, h0, Wx, Wh, b)[0] dx_num = eval_numerical_gradient_array(fx, x, dout) dh0_num = eval_numerical_gradient_array(fh0, h0, dout) dWx_num = eval_numerical_gradient_array(fWx, Wx, dout) dWh_num = eval_numerical_gradient_array(fWh, Wh, dout) db_num = eval_numerical_gradient_array(fb, b, dout) print 'dx error: ', rel_error(dx_num, dx) print 'dh0 error: ', rel_error(dx_num, dx) print 'dWx error: ', rel_error(dx_num, dx) print 'dWh error: ', rel_error(dx_num, dx) print 'db error: ', rel_error(dx_num, dx) #LSTM captioning model: test loss N, D, W, H = 10, 20, 30, 40 word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3} V = len(word_to_idx)
from layers import conv_backward_naive, conv_forward_naive, max_pool_backward_naive, max_pool_forward_naive import numpy as np def rel_error(x, y): """ returns relative error """ return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) x = np.random.randn(4, 3, 5, 5) w = np.random.randn(2, 3, 3, 3) b = np.random.randn(2, ) dout = np.random.randn(4, 2, 5, 5) conv_param = {'stride': 1, 'pad': 1} dx_num = eval_numerical_gradient_array( lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout) dw_num = eval_numerical_gradient_array( lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout) db_num = eval_numerical_gradient_array( lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout) out, cache = conv_forward_naive(x, w, b, conv_param) dx, dw, db = conv_backward_naive(dout, cache) # Your errors should be around 1e-9' print('Testing conv_backward_naive function') print('dx error: ', rel_error(dx, dx_num)) print('dw error: ', rel_error(dw, dw_num)) print('db error: ', rel_error(db, db_num)) pass