def test_fully_connected_forward(): data, weights, bias, expected = fixed_case() layer = FullyConnected(4, 2) layer.weights = weights layer.bias = bias layer_output = layer(Graph(data)) gradient_checker.assert_allclose(layer_output.data, expected)
def test_softmax_forward(): data = np.random.uniform(-1, 1, (3, 10)).astype(constants.DTYPE) output = softmax(Graph(data)).data expected_output = np.exp(data) for i in range(len(output)): expected_output[i] /= expected_output[i].sum() gradient_checker.assert_allclose(output, expected_output)
def test_softmax_cross_entropy_backward(): data, labels = get_data() gradient = init([2]) loss_function = SoftmaxCrossEntropy() loss_function(Graph(data), Graph(labels)) computed_gradient_data, computed_gradient_label = loss_function.backward(gradient) assert computed_gradient_label is None f = lambda: loss_function.internal_forward((data, labels)) numerical_gradient_data, _ = gradient_checker.compute_numerical_gradient(f, (data, labels), (gradient,), eps=1e-2) gradient_checker.assert_allclose(computed_gradient_data, numerical_gradient_data, atol=1e-4)
def test_add_backward(): data = init([2]) gradient = init([1]) add_function = Add() computed_gradients_1, computed_gradients_2 = add_function.backward(gradient) data_copy = np.copy(data) f = lambda: add_function.internal_forward((data, data_copy)) numerical_gradients_1, numerical_gradients_2 = gradient_checker.compute_numerical_gradient(f, (data, data_copy), (gradient,)) gradient_checker.assert_allclose(computed_gradients_1, numerical_gradients_1) gradient_checker.assert_allclose(computed_gradients_2, numerical_gradients_2)
def test_dropout_backward(): data = get_data() gradient = np.random.random(data.shape).astype(constants.DTYPE) data_graph = Graph(data) dropout_function = Dropout(0.5) dropout_result = dropout_function(data_graph) computed_gradients, = dropout_function.backward(gradient) f = lambda: _dropout(data, dropout_result.creator) numerical_gradients, = gradient_checker.compute_numerical_gradient( f, (data, ), gradient, eps=0.1) gradient_checker.assert_allclose(computed_gradients, numerical_gradients)
def test_relu_backward(): data = np.random.uniform(-1, 1, (5, 4)).astype(constants.DTYPE) gradient = np.random.random(data.shape).astype(dtype=constants.DTYPE) data_graph = Graph(data) relu_function = Relu() relu_function(data_graph) computed_gradients, = relu_function.backward(gradient) f = lambda: relu_function.internal_forward((data, )) numerical_gradients, = gradient_checker.compute_numerical_gradient( f, (data, ), (gradient, )) gradient_checker.assert_allclose(computed_gradients, numerical_gradients)
def test_softmax_backward(): data = np.random.uniform(-1, 1, (3, 10)).astype(constants.DTYPE) gradient = np.random.uniform(-1, 1, (3, 10)).astype(constants.DTYPE) data_graph = Graph(data) softmax_function = Softmax() softmax_function(data_graph) computed_gradients, = softmax_function.backward(gradient) f = lambda: softmax_function.internal_forward((data, )) numerical_gradients, = gradient_checker.compute_numerical_gradient( f, (data, ), (gradient, ), eps=1e-2) gradient_checker.assert_allclose(computed_gradients, numerical_gradients)
def test_sgd(): learning_rate = 0.0017 optimizer = SGD(learning_rate) gradients = init( [[0.78266141, 0.87160521, 0.91545263, 0.41808932, 0.63775016], [0.16893565, 0.25077806, 0.88390805, 0.92372049, 0.0741453], [0.63734837, 0.28873811, 0.20229677, 0.12343409, 0.08427269]]) desired = init( [[0.00133052, 0.00148173, 0.00155627, 0.00071075, 0.00108418], [0.00028719, 0.00042632, 0.00150264, 0.00157032, 0.00012605], [0.00108349, 0.00049085, 0.0003439, 0.00020984, 0.00014326]]) deltas, = optimizer.run_update_rule((gradients, ), None) gradient_checker.assert_allclose(deltas, desired)
def test_sum_backward(): data = np.random.uniform(-1, 1, (3, 2)).astype(constants.DTYPE) gradient = init([2]) data_graph = Graph(data) sum_function = Sum() sum_function(data_graph) computed_gradients, = sum_function.backward((gradient, )) f = lambda: sum_function.internal_forward((data, )) numerical_gradients, = gradient_checker.compute_numerical_gradient( f, (data, ), (gradient, )) gradient_checker.assert_allclose(computed_gradients, numerical_gradients, atol=1e-4, rtol=1e-3)
def test_mean_squared_error_backward_with_label(): data, data_2 = fixed_case(with_label=True) gradients = init([2]) data_1_graph = Graph(data) data_2_graph = Graph(data_2) mse_function = MeanSquaredError() mse_function(data_1_graph, data_2_graph) computed_gradient_1, computed_gradient_2 = mse_function.backward(gradients) assert computed_gradient_2 is None f = lambda: mse_function.internal_forward((data, data_2)) numerical_gradient_1, _ = gradient_checker.compute_numerical_gradient( f, (data, data_2), (gradients, )) gradient_checker.assert_allclose(computed_gradient_1, numerical_gradient_1)
def test_fully_connected_backward(): data = np.random.uniform(-1, 1, (10, 50)).astype(constants.DTYPE) gradient = np.full((10, 20), 2, dtype=constants.DTYPE) layer = FullyConnected(50, 20) comp_grad_x, comp_grad_weight, comp_grad_bias = layer.internal_backward((data,), (gradient,)) f = lambda: layer.internal_forward((data,)) num_grad_x, num_grad_weight, num_grad_bias = gradient_checker.compute_numerical_gradient(f, (data, layer._weights, layer.bias), (gradient,), eps=1e-2) gradient_checker.assert_allclose(comp_grad_x, num_grad_x, atol=1e-4) gradient_checker.assert_allclose(comp_grad_weight, num_grad_weight, atol=1e-4) gradient_checker.assert_allclose(comp_grad_bias, num_grad_bias, atol=1e-4)
def test_graph_backward_with_layers(): # use a fully connected layer and have a look whether the backward pass distributes the gradients correctly data = np.random.uniform(-1, 1, (2, 2)).astype(constants.DTYPE) labels = np.array([1, 1], dtype=np.int32) fc_layer = FullyConnected(2, 2) fc_layer.weights[...] = np.zeros_like(fc_layer.weights) fc_layer.bias[...] = np.array([-10, 10]) def run_forward(inputs, labels): fc_result = fc_layer(inputs) loss = F.softmax_cross_entropy(fc_result, labels) return loss data_graph = Graph(data) label_graph = Graph(labels) loss = run_forward(data_graph, label_graph) optimizer = SGD(0.001) loss.backward(optimizer) assert label_graph.grad is None gradient_checker.assert_allclose(data_graph.grad, np.zeros_like(data_graph.grad)) gradient_checker.assert_allclose(fc_layer.weights, np.zeros_like(fc_layer.weights)) gradient_checker.assert_allclose(fc_layer.bias, np.array([-10, 10])) # change the labels and make sure that the gradients are different now # the absolute values of the gradients of one sample shall be higher than the gradients of the other sample labels = np.array([0, 1], dtype=np.int32) label_graph = Graph(labels) loss = run_forward(data_graph, label_graph) loss.backward(optimizer) assert label_graph.grad is None assert (np.abs(data_graph.grad[0]) > np.abs(data_graph.grad[1])).all()
def test_ropout_test_mode(): data = get_data() dropout_result = dropout(data, dropout_ratio=0.5, train=False) # there should be no changes in test mode gradient_checker.assert_allclose(dropout_result.data, data)
def test_dropout_forward_ratio_0(): data = get_data() result = dropout(Graph(data), dropout_ratio=0.) gradient_checker.assert_allclose(result.data, data)