def test_lstm_backward() -> None: N, D, T, H = 2, 3, 10, 6 x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) Wx = np.random.randn(D, 4 * H) Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) model = LSTM(D, H, N) model.h0 = h0 model.Wx = Wx model.Wh = Wh model.b = b out = model(x) dout = np.random.randn(*out.shape) dx, dh0, dWx, dWh, db = model.backward(dout) params = {"h0": h0, "Wx": Wx, "Wh": Wh, "b": b} dx_num, dh0_num, dWx_num, dWh_num, db_num = estimate_gradients( model, dout, x, params ) assert np.allclose(dx_num, dx), f"dx error: {rel_error(dx_num, dx)}" assert np.allclose(dh0_num, dh0), f"dh0 error: {rel_error(dh0_num, dh0)}" assert np.allclose(dWx_num, dWx), f"dWx error: {rel_error(dWx_num, dWx)}" assert np.allclose(dWh_num, dWh), f"dWh error: {rel_error(dWh_num, dWh)}" assert np.allclose(db_num, db), f"db error: {rel_error(db_num, db)}"
def test_relu_backward() -> None: x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) model = ReLU() (dx_num, ) = estimate_gradients(model, dout, x, {}) _ = model(x) dx = model.backward(dout) assert np.allclose(dx_num, dx)
def test_maxpool2d_backward() -> None: x = np.random.randn(3, 2, 4, 4) dout = np.random.randn(3, 2, 2, 2) model = MaxPool2d(kernel_size=(2, 2), stride=2) (dx_num, ) = estimate_gradients(model, dout, x, {}) _ = model(x) dx = model.backward(dout) assert np.allclose(dx_num, dx)
def test_dropout_backward() -> None: np.random.seed(231) x = np.random.randn(10, 10) + 10 dout = np.random.randn(*x.shape) model = Dropout(p=0.2, seed=123) _ = model(x) dx = model.backward(dout) (dx_num, ) = estimate_gradients(model, dout, x, {}) assert np.allclose(dx, dx_num)
def test_embedding_backward() -> None: N, T, V, D = 50, 3, 5, 6 x = np.random.randint(V, size=(N, T)) W = np.random.randn(V, D) dout = np.random.randn(N, T, D) model = Embedding(V, D) params = {"W": W} _, dW_num = estimate_gradients(model, dout, x, params) _ = model(x) dW = model.backward(dout) assert np.allclose(dW, dW_num)
def test_conv2d_backward() -> None: x = np.random.randn(3, 3, 3, 3) w = np.random.randn(2, 3, 3, 3) b = np.random.randn(2) dout = np.random.randn(3, 2, 3, 3) model = Conv2d(1, 2, kernel_size=(4, 4), stride=1, pad=1) params = {"w": w, "b": b} dx_num, dw_num, db_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dw, db = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dw_num, dw) assert np.allclose(db_num, db)
def test_linear_backward() -> None: x = np.random.randn(10, 2, 3) w = np.random.randn(6, 5) b = np.random.randn(5) dout = np.random.randn(10, 5) model = Linear(10, 5) params = {"w": w, "b": b} dx_num, dw_num, db_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dw, db = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dw_num, dw) assert np.allclose(db_num, db)
def test_temporal_linear_backward() -> None: N, T, D, M = 2, 3, 4, 5 x = np.random.randn(N, T, D) w = np.random.randn(D, M) b = np.random.randn(M) dout = np.random.randn(N, T, M) model = TemporalLinear(D, M) params = {"w": w, "b": b} dx_num, dw_num, db_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dw, db = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dw_num, dw) assert np.allclose(db_num, db)
def test_spatial_batchnorm_backward() -> None: N, C, H, W = 2, 3, 4, 5 x = 5 * np.random.randn(N, C, H, W) + 12 gamma = np.random.randn(C) beta = np.random.randn(C) dout = np.random.randn(N, C, H, W) model = SpatialBatchNorm(C) params = {"gamma": gamma, "beta": beta} dx_num, dgamma_num, dbeta_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dgamma, dbeta = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dgamma_num, dgamma) assert np.allclose(dbeta_num, dbeta)
def test_spatial_groupnorm_backward() -> None: N, C, H, W, G = 2, 6, 4, 5, 2 x = 5 * np.random.randn(N, C, H, W) + 12 gamma = np.random.randn(1, C, 1, 1) beta = np.random.randn(1, C, 1, 1) dout = np.random.randn(N, C, H, W) model = SpatialGroupNorm(C, G) params = {"gamma": gamma, "beta": beta} dx_num, dgamma_num, dbeta_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dgamma, dbeta = model.backward(dout) assert np.allclose(dx_num, dx) assert np.allclose(dgamma_num, dgamma) assert np.allclose(dbeta_num, dbeta)
def test_batchnorm_backward_naive() -> None: N, D = 4, 5 x = 5 * np.random.randn(N, D) + 12 gamma = np.random.randn(D) beta = np.random.randn(D) dout = np.random.randn(N, D) model = BatchNorm(D) params = {"gamma": gamma, "beta": beta} dx_num, dgamma_num, dbeta_num = estimate_gradients(model, dout, x, params) _ = model(x) dx, dgamma, dbeta = model.backward_naive(dout) assert np.allclose(dx_num, dx) assert np.allclose(dgamma_num, dgamma) assert np.allclose(dbeta_num, dbeta)
def test_rnn_cell_backward() -> None: N, D, H = 4, 5, 6 x = np.random.randn(N, D) prev_h = np.random.randn(N, H) Wx = np.random.randn(D, H) Wh = np.random.randn(H, H) b = np.random.randn(H) dnext_h = np.random.randn(*prev_h.shape) model = RNNCell(prev_h=prev_h, Wx=Wx, Wh=Wh, b=b) params = {"prev_h": prev_h, "Wx": Wx, "Wh": Wh, "b": b} dx_num, dprev_h_num, dWx_num, dWh_num, db_num = estimate_gradients( model, dnext_h, x, params) _ = model(x) dx, dprev_h, dWx, dWh, db = model.backward(dnext_h) assert np.allclose(dx_num, dx) assert np.allclose(dprev_h_num, dprev_h) assert np.allclose(dWx_num, dWx) assert np.allclose(dWh_num, dWh) assert np.allclose(db_num, db)