Exemplo n.º 1
0
#                      Batch normalization: Backward
#############################################################################
# Gradient check batchnorm backward pass

N, D = 4, 5
x = 5 * np.random.randn(N, D) + 12
gamma = np.random.randn(D)
beta = np.random.randn(D)
dout = np.random.randn(N, D)

bn_param = {'mode': 'train'}
fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0]
fg = lambda a: batchnorm_forward(x, gamma, beta, bn_param)[0]
fb = lambda b: batchnorm_forward(x, gamma, beta, bn_param)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
da_num = eval_numerical_gradient_array(fg, gamma, dout)
db_num = eval_numerical_gradient_array(fb, beta, dout)

_, cache = batchnorm_forward(x, gamma, beta, bn_param)
dx, dgamma, dbeta = batchnorm_backward(dout, cache)

print('dx error: ', rel_error(dx_num, dx))
print('dgamma error: ', rel_error(da_num, dgamma))
print('dbeta error: ', rel_error(db_num, dbeta))

#############################################################################
#                Batch Normalization: alternative backward
#############################################################################
N, D = 100, 500
x = 5 * np.random.randn(N, D) + 12
Exemplo n.º 2
0
  correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                      [ 3.25553199,  3.5141327,   3.77273342]])

  # Compare your output with ours. The error should be around 1e-9.
  print('Testing affine_forward function:')
  print('difference: ', rel_error(out, correct_out))

  #############################################################################
  #                                  Affine layer: backward       
  #############################################################################
  x = np.random.randn(10, 2, 3)
  w = np.random.randn(6, 5)
  b = np.random.randn(5)
  dout = np.random.randn(10, 5)

  dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
  dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
  db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)

  _, cache = affine_forward(x, w, b)          
  dx, dw, db = affine_backward(dout, cache)

  # The error should be around 1e-10
  print( 'Testing affine_backward function:')
  print( 'dx error: ', rel_error(dx_num, dx))
  print( 'dw error: ', rel_error(dw_num, dw))
  print( 'db error: ', rel_error(db_num, db))

  #############################################################################
  #                                  Relu: foward       
  #############################################################################
Exemplo n.º 3
0
    h = np.random.randn(N, H)
    Wx = np.random.randn(D, H)
    Wh = np.random.randn(H, H)
    b = np.random.randn(H)

    out, cache = RNN_step_forward(x, h, Wx, Wh, b)

    dnext_h = np.random.randn(*out.shape)

    fx = lambda x: RNN_step_forward(x, h, Wx, Wh, b)[0]
    fh = lambda prev_h: RNN_step_forward(x, h, Wx, Wh, b)[0]
    fWx = lambda Wx: RNN_step_forward(x, h, Wx, Wh, b)[0]
    fWh = lambda Wh: RNN_step_forward(x, h, Wx, Wh, b)[0]
    fb = lambda b: RNN_step_forward(x, h, Wx, Wh, b)[0]

    dx_num = eval_numerical_gradient_array(fx, x, dnext_h)
    dprev_h_num = eval_numerical_gradient_array(fh, h, dnext_h)
    dWx_num = eval_numerical_gradient_array(fWx, Wx, dnext_h)
    dWh_num = eval_numerical_gradient_array(fWh, Wh, dnext_h)
    db_num = eval_numerical_gradient_array(fb, b, dnext_h)

    dx, dprev_h, dWx, dWh, db = RNN_step_backward(dnext_h, cache)

    print('dx error: ', rel_error(dx_num, dx))
    print('dprev_h error: ', rel_error(dprev_h_num, dprev_h))
    print('dWx error: ', rel_error(dWx_num, dWx))
    print('dWh error: ', rel_error(dWh_num, dWh))
    print('db error: ', rel_error(db_num, db))

    # Vanilla RNN: forward
    N, T, D, H = 2, 3, 4, 5
Exemplo n.º 4
0
  print('Mean of input: ', x.mean())
  print('Mean of train-time output: ', out.mean())
  print('Mean of test-time output: ', out_test.mean())
  print('Fraction of train-time output set to zero: ', (out == 0).mean())
  print('Fraction of test-time output set to zero: ', (out_test == 0).mean())

#############################################################################
#                             Dropout backward pass
#############################################################################
x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)

dropout_param = {'mode': 'train', 'p': 0.8, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

print('dx relative error: ', rel_error(dx, dx_num))

#############################################################################
#                             Fully-connected nets with Dropout
#############################################################################
N, D, H1, H2, C = 2, 15, 20, 30, 10
X = np.random.randn(N, D)
y = np.random.randint(C, size=(N,))

for dropout in [0, 0.25, 1.0]:
  print('Running check with dropout = ', dropout)
  model = FullyConnectedNet([H1, H2], input_dim=D, num_classes=C,
                            weight_scale=5e-2, dtype=np.float64,
                            dropout=dropout, seed=123)
Exemplo n.º 5
0
plt.subplot(2, 3, 6)
imshow_noax(out[1, 1])

plt.show()

#############################################################################
#                             Convolution: Naive backward pass
#############################################################################
from time import time
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2, )
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(
    lambda x: conv_forward_naive(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(
    lambda w: conv_forward_naive(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(
    lambda b: conv_forward_naive(x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# Your errors should be around 1e-9'
print('Testing conv_backward_naive function')
print('dx error: ', rel_error(dx, dx_num))
print('dw error: ', rel_error(dw, dw_num))
print('db error: ', rel_error(db, db_num))

#############################################################################