示例#1
0
    def test_backprop(self):
        x = np.random.randn(self.N, self.img_height, self.img_width)
        w = np.random.randn(self.D, self.H)
        b = np.random.randn(self.H)
        grad_output = np.random.randn(self.N, self.H)

        # Numerical gradient w.r.t inputs
        num_grad_x = eval_numerical_gradient_array(f=lambda x: self.layer.forward_prop(x, w, b), 
                                                   x=x, 
                                                   df=grad_output)
        # Numerical gradient w.r.t weights
        num_grad_w = eval_numerical_gradient_array(f=lambda w: self.layer.forward_prop(x, w, b),
                                                   x=w, 
                                                   df=grad_output)

        # Numerical gradient w.r.t. biases
        num_grad_b = eval_numerical_gradient_array(f=lambda b: self.layer.forward_prop(x, w, b),
                                                   x=b, 
                                                   df=grad_output)

        # Compute gradients using backprop algorithm
        grad_x, grad_w, grad_b = self.layer.backprop(grad_output)

        np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
        np.testing.assert_array_almost_equal(num_grad_w, grad_w, decimal=7)
        np.testing.assert_array_almost_equal(num_grad_b, grad_b, decimal=7)
示例#2
0
    def test_backward(self):
        np.random.seed(271)

        N, T, H, M = 2, 3, 4, 5 

        # Create the layer
        layer = TemporalAffineLayer(H, M)
        W = np.random.randn(H, M)
        b = np.random.randn(M)

        # Create some arbitrary inputs
        x = np.random.randn(N, T, H)

        out = layer.forward(x, W=W, b=b)
        grad_out = np.random.randn(*out.shape)
        grad_x, grad_W, grad_b = layer.backward(grad_out)

        fx = lambda x: layer.forward(x, W=W, b=b)
        fw = lambda W: layer.forward(x, W=W, b=b)
        fb = lambda b: layer.forward(x, W=W, b=b)

        grad_x_num = eval_numerical_gradient_array(fx, x, grad_out)
        grad_W_num = eval_numerical_gradient_array(fw, W, grad_out)
        grad_b_num = eval_numerical_gradient_array(fb, b, grad_out)

        self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
示例#3
0
 def loss(self, X, lag=None, phi=None, sigma=None, intercept=None):
     if lag is None:
         lag = self._lag
     if phi is None:
         phi = self.params['phi']
     if sigma is None:
         sigma = self.params['sigma']
     if intercept is None:
         intercept = self.params['intercept']
     loglikelihood = self.get_loglikelihood(X,
                                            lag=lag,
                                            phi=phi,
                                            sigma=sigma,
                                            intercept=intercept)
     """grad_phi is a column vector"""
     grads = {}
     grads['phi'] = eval_numerical_gradient_array(
         lambda phi: self.get_loglikelihood(X, lag, phi, sigma, intercept),
         phi, 1)
     grads['intercept'] = eval_numerical_gradient_array(
         lambda intercept: self.get_loglikelihood(X, lag, phi, sigma,
                                                  intercept), intercept, 1)
     grads['sigma'] = eval_numerical_gradient_array(
         lambda sigma: self.get_loglikelihood(X, lag, phi, sigma, intercept
                                              ), sigma, 1)
     return loglikelihood, grads
    def test_linear_backward(self):
        np.random.seed(42)
        rel_error_max = 1e-5

        for test_num in range(10):
            N = np.random.choice(range(1, 20))
            D = np.random.choice(range(1, 100))
            C = np.random.choice(range(1, 10))
            x = np.random.randn(N, D)
            dout = np.random.randn(N, C)

            layer_params = {
                'input_size': D,
                'output_size': C,
                'reg': 0.0,
                'weight_scale': 0.001
            }
            layer = LinearLayer(layer_params)
            layer.initialize()
            layer.set_train_mode()

            out = layer.forward(x)
            dx = layer.backward(dout)
            dw = layer.grads['w']
            dx_num = eval_numerical_gradient_array(
                lambda xx: layer.forward(xx), x, dout)
            dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x),
                                                   layer.params['w'], dout)

            self.assertLess(rel_error(dx, dx_num), rel_error_max)
            self.assertLess(rel_error(dw, dw_num), rel_error_max)
示例#5
0
  def test_linear_backward(self):
    np.random.seed(42)
    rel_error_max = 1e-5

    for test_num in range(10):
      N = np.random.choice(range(1, 20))
      D = np.random.choice(range(1, 100))
      C = np.random.choice(range(1, 10))
      x = np.random.randn(N, D)
      dout = np.random.randn(N, C)
      layer = LinearModule(D, C)
      out = layer.forward(x)
      dx = layer.backward(dout)
      dw = layer.grads['weight']
      dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)
      dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout)
      self.assertLess(rel_error(dx, dx_num), rel_error_max)
      self.assertLess(rel_error(dw, dw_num), rel_error_max)
示例#6
0
    def test_backprop(self):
        x = np.random.randn(self.N, self.D)
        grad_output = np.random.randn(*x.shape)

        # Numerical gradient w.r.t inputs
        num_grad_x = eval_numerical_gradient_array(
            f=lambda x: self.layer.forward_prop(x), x=x, df=grad_output)

        # Compute gradients using backprop algorithm
        grad_x = self.layer.backprop(grad_output)

        np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
  def test_linear_backward(self):
    np.random.seed(42)
    rel_error_max = 1e-5

    for test_num in range(10):
     
      N = np.random.choice(range(1, 20)) #batch size   
      D = np.random.choice(range(1, 100)) #num in_features
      C = np.random.choice(range(1, 10)) #num classes = out_features
      x = np.random.randn(N, D) #mini-batch
      dout = np.random.randn(N, C) #cross-entropy loss?

      layer = LinearModule(D, C)
      
      out = layer.forward(x) 
      dx = layer.backward(dout)
      dw = layer.grads['weight']
      dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)
      dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['weight'], dout)

      self.assertLess(rel_error(dx, dx_num), rel_error_max)
      self.assertLess(rel_error(dw, dw_num), rel_error_max)
示例#8
0
    def test_backward(self):
        np.random.seed(271)

        N, D, T, H = 2, 3, 10, 6

        # Create the layer
        layer = LSTMRecurrentLayer(D, H)
        Wx = np.random.randn(D, 4 * H)
        Wh = np.random.randn(H, 4 * H)
        b = np.random.randn(4 * H)

        # Create some arbitrary inputs
        x = np.random.randn(N, T, D)
        h0 = np.random.randn(N, H)

        hidden_state_over_time = layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)

        grad_h_over_time = np.random.randn(*hidden_state_over_time.shape)

        grad_x, grad_h0, grad_Wx, grad_Wh, grad_b = layer.backward(
            grad_h_over_time)

        fx = lambda x: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fh0 = lambda h0: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fWx = lambda Wx: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fWh = lambda Wh: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)
        fb = lambda b: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b)

        grad_x_num = eval_numerical_gradient_array(fx, x, grad_h_over_time)
        grad_h0_num = eval_numerical_gradient_array(fh0, h0, grad_h_over_time)
        grad_Wx_num = eval_numerical_gradient_array(fWx, Wx, grad_h_over_time)
        grad_Wh_num = eval_numerical_gradient_array(fWh, Wh, grad_h_over_time)
        grad_b_num = eval_numerical_gradient_array(fb, b, grad_h_over_time)

        self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_h0_num, grad_h0), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_Wx_num, grad_Wx), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_Wh_num, grad_Wh), 1e-9, places=2)
        self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
示例#9
0
 def test_elu_backward(self):
     np.random.seed(42)
     rel_error_max = 1e-6
 
     for test_num in range(10):
         N = np.random.choice(range(1, 20))
         D = np.random.choice(range(1, 100))
         x = np.random.randn(N, D)
         dout = np.random.randn(*x.shape)
         layer = ELUModule()
         out = layer.forward(x)
         dx = layer.backward(dout)
         dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)
         self.assertLess(rel_error(dx, dx_num), rel_error_max)
示例#10
0
def affine_relu_check():
    np.random.seed(231)
    x = np.random.randn(2, 3, 4)
    w = np.random.randn(12, 10)
    b = np.random.randn(10)
    dout = np.random.randn(2, 10)

    out, cache = affine_relu_forward(x, w, b)
    dx, dw, db = affine_relu_backward(dout, cache)

    dx_num = eval_numerical_gradient_array(
        lambda x: affine_relu_forward(x, w, b)[0], x, dout)
    dw_num = eval_numerical_gradient_array(
        lambda w: affine_relu_forward(x, w, b)[0], w, dout)
    db_num = eval_numerical_gradient_array(
        lambda b: affine_relu_forward(x, w, b)[0], b, dout)

    # Relative error should be around e-10 or less
    print('Testing affine_relu_forward and affine_relu_backward:')
    print('dx error: ', rel_error(dx_num, dx))
    print('dw error: ', rel_error(dw_num, dw))
    print('db error: ', rel_error(db_num, db))
    pass
  def test_linear_backward(self):
    np.random.seed(42)
    rel_error_max = 1e-5

    for test_num in range(10):
      N = np.random.choice(range(1, 20))
      D = np.random.choice(range(1, 100))
      C = np.random.choice(range(1, 10))
      x = np.random.randn(N, D)
      dout = np.random.randn(N, C)

      layer_params = {'input_size': D, 'output_size': C, 'reg': 0.0, 'weight_scale': 0.001}
      layer = LinearLayer(layer_params)
      layer.initialize()
      layer.set_train_mode()

      out = layer.forward(x)
      dx = layer.backward(dout)
      dw = layer.grads['w']
      dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)
      dw_num = eval_numerical_gradient_array(lambda w: layer.forward(x), layer.params['w'], dout)

      self.assertLess(rel_error(dx, dx_num), rel_error_max)
      self.assertLess(rel_error(dw, dw_num), rel_error_max)
  def test_relu_backward(self):
    np.random.seed(42)
    rel_error_max = 1e-6

    for test_num in range(10):
      N = np.random.choice(range(1, 20))
      D = np.random.choice(range(1, 100))
      x = np.random.randn(N, D)
      dout = np.random.randn(*x.shape)

      layer = ReLULayer()
      layer.initialize()
      layer.set_train_mode()

      out = layer.forward(x)
      dx = layer.backward(dout)
      dx_num = eval_numerical_gradient_array(lambda xx: layer.forward(xx), x, dout)

      self.assertLess(rel_error(dx, dx_num), rel_error_max)
    def test_softmax_backward(self):
        np.random.seed(42)
        rel_error_max = 1e-5

        for test_num in range(10):
            N = np.random.choice(range(1, 20))
            D = np.random.choice(range(1, 100))
            x = np.random.randn(N, D)
            dout = np.random.randn(*x.shape)

            layer = SoftMaxLayer()
            layer.initialize()
            layer.set_train_mode()

            out = layer.forward(x)
            dx = layer.backward(dout)
            dx_num = eval_numerical_gradient_array(
                lambda xx: layer.forward(xx), x, dout)

            self.assertLess(rel_error(dx, dx_num), rel_error_max)
示例#14
0
    def test_backward(self):
        np.random.seed(271)

        N, T, V, D = 50, 3, 5, 6

        # Create the layer
        layer = WordEmbeddingLayer(V, D)
        layer.W = np.random.randn(V, D)

        # Create some arbitrary inputs
        x = np.random.randint(V, size=(N, T))

        out = layer.forward(x)

        grad_out = np.random.randn(*out.shape)

        grad_W = layer.backward(grad_out)

        f = lambda W: layer.forward(x, W=W)
        grad_W_num = eval_numerical_gradient_array(f, layer.W, grad_out)

        self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
###################################################################################
#   Affine layer: backward.                                                       #
###################################################################################
#   In the file layers.py implement the affine_backward function.                 #
#   Once you are done you can test your implementation using numeric gradient.    #
###################################################################################

# Test the affine_backward function

x = np.random.randn(10, 2, 3)
theta = np.random.randn(6, 5)
theta_0 = np.random.randn(5)
dout = np.random.randn(10, 5)

if layers.affine_forward(x,theta,theta_0)[0] is not None:
  dx_num = eval_numerical_gradient_array(lambda x: layers.affine_forward(x, theta, theta_0)[0], x, dout)
  dtheta_num = eval_numerical_gradient_array(lambda theta: layers.affine_forward(x, theta, theta_0)[0], theta, dout)
  dtheta_0_num = eval_numerical_gradient_array(lambda b: layers.affine_forward(x, theta, theta_0)[0], theta_0, dout)

  _, cache = layers.affine_forward(x, theta, theta_0)
  dx, dtheta, dtheta_0 = layers.affine_backward(dout, cache)

# The error should be around 1e-10
  print 'Testing affine_backward function:'
  print 'dx error (should be around 1e-10): ', rel_error(dx_num, dx)
  print 'dtheta error (should be around 1e-10): ', rel_error(dtheta_num, dtheta)
  print 'dtheta_0 error (should be around 1e-10): ', rel_error(dtheta_0_num, dtheta_0)


# Problem 3.1.3
###################################################################################
#------------------------------------------------------------------------------
#%%
print('\n--------- affine_sigmoid_affine_backward test --------- ')
np.random.seed(231)
x = np.random.randn(10, 10)
w1 = np.random.randn(10, 5)
b1 = np.random.randn(5)
w2 = np.random.randn(5, 7)
b2 = np.random.randn(7)
dout = np.random.randn(10, 7)
dout1 = np.random.randn(10, 5)

out, cache = affine_sigmoid_affine_forward(x, w1, b1, w2, b2)
dx, dw1, db1, dw2, db2 = affine_sigmoid_affine_backward(dout, cache)
dx_num = eval_numerical_gradient_array(
    lambda x: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], x, dout)
dw1_num = eval_numerical_gradient_array(
    lambda w1: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], w1, dout)
db1_num = eval_numerical_gradient_array(
    lambda b1: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], b1, dout)
dw2_num = eval_numerical_gradient_array(
    lambda w2: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], w2, dout)
db2_num = eval_numerical_gradient_array(
    lambda b2: affine_sigmoid_affine_forward(x, w1, b1, w2, b2)[0], b2, dout)
#out, cache = affine_sigmoid_forward(x, w1, b1)
#dx, dw, db = affine_sigmoid_backward(dout1, cache)
#dx_num = eval_numerical_gradient_array(lambda x:
#                    affine_sigmoid_forward(x, w1, b1)[0], x, dout1)

print('\ndx_num: ', dx_num)
print('\ndx: ', dx)
示例#17
0
h = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)

out, cache = rnn_step_forward(x, h, Wx, Wh, b)

dnext_h = np.random.randn(*out.shape)

fx = lambda x: rnn_step_forward(x, h, Wx, Wh, b)[0]
fh = lambda prev_h: rnn_step_forward(x, h, Wx, Wh, b)[0]
fWx = lambda Wx: rnn_step_forward(x, h, Wx, Wh, b)[0]
fWh = lambda Wh: rnn_step_forward(x, h, Wx, Wh, b)[0]
fb = lambda b: rnn_step_forward(x, h, Wx, Wh, b)[0]

dx_num = eval_numerical_gradient_array(fx, x, dnext_h)
dprev_h_num = eval_numerical_gradient_array(fh, h, dnext_h)
dWx_num = eval_numerical_gradient_array(fWx, Wx, dnext_h)
dWh_num = eval_numerical_gradient_array(fWh, Wh, dnext_h)
db_num = eval_numerical_gradient_array(fb, b, dnext_h)

dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)

print 'dx error: ', rel_error(dx_num, dx)
print 'dprev_h error: ', rel_error(dprev_h_num, dprev_h)
print 'dWx error: ', rel_error(dWx_num, dWx)
print 'dWh error: ', rel_error(dWh_num, dWh)
print 'db error: ', rel_error(db_num, db)

#Vanilla RNN: forward
N, T, D, H = 2, 3, 4, 5
示例#18
0
    print('Mean of test-time output: ', out_test.mean())
    print('Fraction of train-time output set to zero: ', (out == 0).mean())
    print('Fraction of test-time output set to zero: ', (out_test == 0).mean())
    print()

# # Dropout backward pass
# In the file `cs231n/layers.py`, implement the backward pass for dropout. After doing so, run the following cell to numerically gradient-check your implementation.

np.random.seed(231)
x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)

dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(
    lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print('dx relative error: ', rel_error(dx, dx_num))

# # Fully-connected nets with Dropout
# In the file `cs231n/classifiers/fc_net.py`, modify your implementation to use dropout. Specificially, if the constructor the the net receives a nonzero value for the `dropout` parameter, then the net should add dropout immediately after every ReLU nonlinearity. After doing so, run the following to numerically gradient-check your implementation.

np.random.seed(231)
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N, ))

for dropout in [0, 0.25, 0.5]:
    print('Running check with dropout = ', dropout)
    model = FullyConnectedNet([H1, H2],
                              input_dim=D,
示例#19
0
from gradient_check import eval_numerical_gradient_array
import numpy as np
from layers import *
N = 2
D = 3
M = 4
x = np.random.normal(size=(N, D))
w = np.random.normal(size=(D, M))
b = np.random.normal(size=(M, ))
# dout = np.random.normal(size=(N, M))
dout = np.random.normal(size=(N, D))

# out, cache = affine_forward(x, w, b)
# f=lambda x: affine_forward(x, w, b)[0]
# grad = affine_backward(dout, cache)[0]
# ngrad = eval_numerical_gradient_array(f, x, dout)
#
# print(grad - ngrad)

out, cache = relu_forward(x)
f = lambda x: relu_forward(x)[0]
grad = relu_backward(dout, cache)
ngrad = eval_numerical_gradient_array(f, x, dout)
print(grad - ngrad)
imshow_noax(out[1, 1])
plt.show()

# # Convolution: Naive backward pass
# Implement the backward pass for the convolution operation in the function `conv_backward_naive` in the file `cs231n/layers.py`. Again, you don't need to worry too much about computational efficiency.
#
# When you are done, run the following to check your backward pass with a numeric gradient check.

np.random.seed(231)
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2, )
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(
    lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-8'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

# # Max pooling: Naive forward
示例#21
0
###################################################################################
# Implement the backward pass for the convolution operation in the                #
# function conv_backward_naive in the file layers.py. Again, you                  #
# don't need to worry too much about computational efficiency.  When you          #
# are done, run the following to check your backward pass with a numeric          #
# gradient check.                                                                 #
###################################################################################

x = np.random.randn(4, 3, 5, 5)
theta = np.random.randn(2, 3, 3, 3)
theta0 = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

if layers.conv_forward_naive(x,theta,theta0, conv_param)[0] is not None:
  dx_num = eval_numerical_gradient_array(lambda x: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], x, dout)
  dtheta_num = eval_numerical_gradient_array(lambda theta: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], theta, dout)
  dtheta0_num = eval_numerical_gradient_array(lambda theta0: layers.conv_forward_naive(x, theta, theta0, conv_param)[0], theta0, dout)

  out, cache = layers.conv_forward_naive(x, theta, theta0, conv_param)
  dx, dtheta, dtheta0 = layers.conv_backward_naive(dout, cache)

# Your errors should be around 1e-9
  print 'Testing conv_backward_naive function'
  print 'dx error: ', rel_error(dx, dx_num)
  print 'dtheta error: ', rel_error(dtheta, dtheta_num)
  print 'dtheta0 error: ', rel_error(dtheta0, dtheta0_num)

# Problem 3.2.3
###################################################################################
# Max pooling: Naive forward                                                      #
gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8])
out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
print('After spatial batch normalization (nontrivial gamma, beta):')
print('  Shape: ', out.shape)
print('  Means: ', out.mean(axis=(0, 2, 3)))
print('  Stds: ', out.std(axis=(0, 2, 3)))
'''

##############backward##############

np.random.seed(231)
N, C, H, W = 2, 3, 4, 5
x = 5 * np.random.randn(N, C, H, W) + 12
gamma = np.random.randn(C)
beta = np.random.randn(C)
dout = np.random.randn(N, C, H, W)

bn_param = {'mode': 'train'}
fx = lambda x: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fg = lambda a: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fb = lambda b: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
da_num = eval_numerical_gradient_array(fg, gamma, dout)
db_num = eval_numerical_gradient_array(fb, beta, dout)

_, cache = spatial_batchnorm_forward(x, gamma, beta, bn_param)
dx, dgamma, dbeta = spatial_batchnorm_backward(dout, cache)
print('dx error: ', rel_error(dx_num, dx))
print('dgamma error: ', rel_error(da_num, dgamma))
print('dbeta error: ', rel_error(db_num, dbeta))
                         [[-0.14526316, -0.13052632],
                          [-0.08631579, -0.07157895]],
                         [[-0.02736842, -0.01263158],
                          [ 0.03157895,  0.04631579]]],
                        [[[ 0.09052632,  0.10526316],
                          [ 0.14947368,  0.16421053]],
                         [[ 0.20842105,  0.22315789],
                          [ 0.26736842,  0.28210526]],
                         [[ 0.32631579,  0.34105263],
                          [ 0.38526316,  0.4       ]]]])

# Compare your output with ours. Difference should be around 1e-8.
print('Testing max_pool_forward_naive function:')
print('difference: ', rel_error(out, correct_out))
'''
#######################################backward###########################################

np.random.seed(231)
x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 2, 4, 4)
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

dx_num = eval_numerical_gradient_array(lambda x: max_pool_forward_naive(x, pool_param)[0], x, dout)

out, cache = max_pool_forward_naive(x, pool_param)
dx = max_pool_backward_naive(dout, cache)

# Your error should be around 1e-12
print('Testing max_pool_backward_naive function:')
print('dx error: ', rel_error(dx, dx_num))
'''
示例#24
0
def gradient_check(X, model, y):
	loss, grads = chess_convnet(X, model, y)
	dx_num = eval_numerical_gradient_array(lambda x: chess_convnet(x, model)[1]['W1'], x, grads)
	return rel_error(dx_num, grads['W1'])
示例#25
0
import numpy as np
from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from layers import *

# Test the affine_backward function

x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x,
                                       dout)
示例#26
0
out, _ = affine_forward(x, w, b)
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                        [ 3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around e-9 or less.
print('Testing affine_forward function:')
print(rel_error(out, correct_out)<1e-8)

np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)


# The error should be around e-10 or less
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx)<=1e-9)
print('dw error: ', rel_error(dw_num, dw)<=1e-9)
print('db error: ', rel_error(db_num, db)<=1e-9)


# Test the relu_forward function
                        [ 3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around 1e-9.
print('Testing affine_forward function:')
print('difference: ', rel_maxError(out, correct_out))
#------------------------------------------------------------------------------
#%%
# Test the affine_backward function
print('\n--------- affine_backward test --------- ')
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)

# The error should be around 1e-10
print('Testing affine_backward function:')
print('dx error: ', rel_maxError(dx_num, dx))
print('dw error: ', rel_maxError(dw_num, dw))
print('db error: ', rel_maxError(db_num, db))
#------------------------------------------------------------------------------
#%%
print('\n--------- affine_backward test --------- ')
np.random.seed(231)
示例#28
0
Wh = np.random.randn(H, 4 * H)
b = np.random.randn(4 * H)

out, cache = lstm_forward(x, h0, Wx, Wh, b)

dout = np.random.randn(*out.shape)

dx, dh0, dWx, dWh, db = lstm_backward(dout, cache)

fx = lambda x: lstm_forward(x, h0, Wx, Wh, b)[0]
fh0 = lambda h0: lstm_forward(x, h0, Wx, Wh, b)[0]
fWx = lambda Wx: lstm_forward(x, h0, Wx, Wh, b)[0]
fWh = lambda Wh: lstm_forward(x, h0, Wx, Wh, b)[0]
fb = lambda b: lstm_forward(x, h0, Wx, Wh, b)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
dh0_num = eval_numerical_gradient_array(fh0, h0, dout)
dWx_num = eval_numerical_gradient_array(fWx, Wx, dout)
dWh_num = eval_numerical_gradient_array(fWh, Wh, dout)
db_num = eval_numerical_gradient_array(fb, b, dout)

print 'dx error: ', rel_error(dx_num, dx)
print 'dh0 error: ', rel_error(dx_num, dx)
print 'dWx error: ', rel_error(dx_num, dx)
print 'dWh error: ', rel_error(dx_num, dx)
print 'db error: ', rel_error(dx_num, dx)

#LSTM captioning model: test loss
N, D, W, H = 10, 20, 30, 40
word_to_idx = {'<NULL>': 0, 'cat': 2, 'dog': 3}
V = len(word_to_idx)
示例#29
0
from layers import conv_backward_naive, conv_forward_naive, max_pool_backward_naive, max_pool_forward_naive
import numpy as np


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2, )
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(
    lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-9'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

pass