def test_sigmoid(): x = np.array([1, 1.1, 1.2, 5.5]) v = Variable(x) sig = Sigmoid(v) np.testing.assert_allclose(sig.forward(), [0.73105858, 0.75026011, 0.76852478, 0.99592986], rtol=1e-5) sig.backward(np.ones(4)) np.testing.assert_allclose(v.get_gradient(), [0.19661193, 0.18736987, 0.17789444, 0.00405357], rtol=1e-5)
def test_reduce_mean_splitter_broadcasting(self): x = np.arange(6).reshape(3, 2) w = np.arange(6, 8).reshape(2, 1) b = 12.0 y = np.arange(8, 11).reshape(3, 1) dl_mse = 11.0 x_variable = Variable(x) w_variable = Variable(w) b_variable = Variable(b) y_variable = Variable(y) xw_node = Multiply(x_variable, w_variable) xwb_node = Add(xw_node, b_variable) xwb_mse_node = MSEWithSplitter(y_variable, xwb_node) xwb_mse_desired = mean_squared_error(y, (x @ w) + np.full((3, 1), b)) xwb_mean_actual = xwb_mse_node.forward() np.testing.assert_allclose(xwb_mean_actual, xwb_mse_desired) xwb_mse_node.backward(dl_mse) dl_db_actual = b_variable.get_gradient() dl_db_desired = dl_mse * 2.0 * np.sum((x @ w) + np.full((3, 1), b) - y) / x.shape[0] np.testing.assert_allclose(dl_db_actual, dl_db_desired) dl_dx = x_variable.get_gradient() dl_dw = w_variable.get_gradient()
def _build_architecture_get_prediction_and_regularization_cost( architecture, weight_decay, current_input): architecture_built = list() regularization_cost = Variable(0.0) weight_decay_variable = Variable(weight_decay) # TODO: constant previous_layer_output = architecture[0]['input'] for layer_dictionary in architecture: assert previous_layer_output == layer_dictionary["input"], \ 'Inconsistent architecture: can not feed {} outputs to {} inputs'.format( previous_layer_output, layer_dictionary['input'] ) activation_function = activation_function_name_to_class[ layer_dictionary["nonlinear"]] regularization_method = regularization_method_name_to_class[ layer_dictionary["regularization"]] layer = FullyConnectedLayer(layer_dictionary["input"], layer_dictionary["output"], activation_function, current_input) regularization_cost = Add( regularization_cost, Multiply(weight_decay_variable, regularization_method(layer.get_weight()))) architecture_built.append(layer) current_input = layer previous_layer_output = layer_dictionary['output'] return architecture_built, current_input, regularization_cost
def test_l2(self): x = np.random.rand(50, 30) - 0.5 v = Variable(x) l2 = L2(v) np.testing.assert_allclose(l2.forward(), np.sum(np.abs(x) ** 2), rtol=1e-5) l2.backward(1) np.testing.assert_allclose(v.get_gradient(), 2 * x, rtol=1e-5)
def test_identity(): x = np.random.rand(10) v = Variable(x) none = Identity(v) np.testing.assert_allclose(none.forward(), x, rtol=1e-5) none.backward(x) np.testing.assert_equal(v.get_gradient(), x)
def test_l1(self): x = np.random.rand(50, 30) - 0.5 v = Variable(x) l1 = L1(v) np.testing.assert_allclose(l1.forward(), np.sum(np.abs(x)), rtol=1e-5) l1.backward(1) np.testing.assert_equal(v.get_gradient(), np.sign(x))
def test_forward(self): y_true = np.array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) y_predicted = np.arange(1, 7).reshape(3, 2) y_predicted = y_predicted / np.tile(np.sum(y_predicted, axis=1).reshape(3, 1), (1,2)) y_true_variable = Variable(y_true) y_predicted_variable = Variable(y_predicted) ce_node = CrossEntropy(y_true_variable, y_predicted_variable) ce_actual = ce_node.forward() ce_desired = log_loss(y_true, y_predicted) self.assertAlmostEqual(ce_actual, ce_desired) dl_dce = 2.0 ce_node.backward(dl_dce) dl_dyp_actual = y_predicted_variable.get_gradient() dl_dyp_desired = -dl_dce * (y_true / y_predicted) / y_true.shape[0] np.testing.assert_allclose(dl_dyp_actual, dl_dyp_desired)
def test_forward_backward_1_no_activation(self): x = np.arange(6).reshape(3, 2) x_variable = Variable(x) fc = FullyConnectedLayer(2, 1, Identity, x_variable) w = fc._w._value.copy() b = fc._b._value.copy() wxb_desired = x @ w + b wxb_actual = fc.forward() np.testing.assert_almost_equal(wxb_actual, wxb_desired) fc.backward(np.array([[6.0], [7.0], [8.0]])) dl_dw_actual = fc._w.get_gradient() dl_dx_actual = x_variable.get_gradient() dl_dw_desired = np.array([[0 * 6 + 2 * 7 + 4 * 8], [1 * 6 + 3 * 7 + 5 * 8]]) dl_dx_desired = np.array([[w[0,0] * 6, w[1,0] * 6], [w[0,0] * 7, w[1,0] * 7], [w[0,0] * 8, w[1,0] * 8]]) np.testing.assert_allclose(dl_dw_actual, dl_dw_desired) np.testing.assert_allclose(dl_dx_actual, dl_dx_desired) dl_db_actual = fc._b.get_gradient() dl_db_desired = np.array([6 + 7 + 8]) np.testing.assert_allclose(dl_db_actual, dl_db_desired)
def test_transpose(self): x = np.random.rand(5, 3) v = Variable(x) t = Transpose(v) np.testing.assert_allclose(t.forward(), x.T) grads = np.random.rand(3, 5) t.backward(grads) np.testing.assert_allclose(v.get_gradient(), grads.T)
def test_reduce_mean_merged(self): # Array y = np.array([-0.5, 1, 2.5]) v2 = Variable(y) m = ReduceMean(v2, 0) np.testing.assert_allclose(m.forward(), 1.0, rtol=1e-5) m.backward(1.0) np.testing.assert_equal(v2.get_gradient(), [1 / 3, 1 / 3, 1 / 3])
def test_splitter_forward(self): y_true = np.array([[1], [2], [3], [4]]) y_predicted = np.array([[8], [7], [6], [5]]) mse_desired = mean_squared_error(y_true, y_predicted) y_true_variable = Variable(y_true) y_predicted_variable = Variable(y_predicted) mse_node = MSEWithSplitter(y_true_variable, y_predicted_variable) mse_actual = mse_node.forward() np.testing.assert_allclose(mse_actual, mse_desired)
def test_forward_backward(self): np.random.seed(42) y = np.random.rand(3, 2) y_variable = Variable(y) softmax_node = Softmax(y_variable) y_softmax_actual = softmax_node.forward() ey = np.exp(y) y_softmax_desired = (ey.T / np.sum(ey, axis=1)).T np.testing.assert_allclose(y_softmax_actual, y_softmax_desired) dl_dsoftmax = np.random.rand(3,2) # --------------- # | 6*e^0| 7*e^1| # --------------- # | 8*e^2| 9*e^3| # --------------- # |10*e^4|11*e^5| # --------------- weighted_ey = dl_dsoftmax * ey # ---------- # | e^0+e^1| # ---------- # | e^2+e^3| # ---------- # | e^4+e^5| # ---------- ey_row_sum = ey.sum(axis=1) # ---------------- # | 6*e^0+ 7*e^1| # ---------------- # | 8*e^2+ 8*e^3| # ---------------- # | 10*e^4+11*e^5| # ---------------- weighted_ey_row_sum = weighted_ey.sum(axis=1) # -------------- # | (e^0+e^1)^2| # -------------- # | (e^2+e^3)^2| # -------------- # | (e^4+e^5)^2| # -------------- squared_ey_row_sum = np.square(ey_row_sum) dl_dy_desired = np.array([ [(weighted_ey[0,0] * ey_row_sum[0] - ey[0,0] * weighted_ey_row_sum[0]) / squared_ey_row_sum[0], (weighted_ey[0,1] * ey_row_sum[0] - ey[0,1] * weighted_ey_row_sum[0]) / squared_ey_row_sum[0]], [(weighted_ey[1,0] * ey_row_sum[1] - ey[1,0] * weighted_ey_row_sum[1]) / squared_ey_row_sum[1], (weighted_ey[1,1] * ey_row_sum[1] - ey[1,1] * weighted_ey_row_sum[1]) / squared_ey_row_sum[1]], [(weighted_ey[2,0] * ey_row_sum[2] - ey[2,0] * weighted_ey_row_sum[2]) / squared_ey_row_sum[2], (weighted_ey[2,1] * ey_row_sum[2] - ey[2,1] * weighted_ey_row_sum[2]) / squared_ey_row_sum[2]], ]) softmax_node.backward(dl_dsoftmax) dl_dy_actual = y_variable.get_gradient() np.testing.assert_allclose(dl_dy_actual, dl_dy_desired)
def test_splitter_backward(self): y_true = np.array([[1], [2], [3], [4]]) y_predicted = np.array([[8], [7], [6], [5]]) mse_derivative_desired = 2.0 / y_true.shape[0] * (y_true - y_predicted) y_true_variable = Variable(y_true) y_predicted_variable = Variable(y_predicted) mse_node = MSEWithSplitter(y_true_variable, y_predicted_variable) mse_node.forward() mse_node.backward() np.testing.assert_allclose(y_true_variable.get_gradient(), mse_derivative_desired)
def test_relu(): x = np.random.random(10) - 0.5 x[1] = -2 # in case all positive v = Variable(x) relu = ReLU(v) expected = np.array(x) expected[x < 0] = 0 np.testing.assert_allclose(relu.forward(), expected, rtol=1e-5) relu.backward(np.ones(10)) np.testing.assert_equal(v.get_gradient(), np.sign(expected))
def test_reduce_size(self): x = np.random.rand(5, 3) v = Variable(x) rs_full = ReduceSize(v) rs_rows = ReduceSize(v, 0) rs_cols = ReduceSize(v, 1) np.testing.assert_equal(rs_full.forward(), 15) np.testing.assert_equal(rs_rows.forward(), 5) np.testing.assert_equal(rs_cols.forward(), 3) grad_before = v.get_gradient() rs_full.backward(np.random.rand(5, 3)) np.testing.assert_equal(v.get_gradient(), grad_before)
def test_reduce_sum(self): x = np.random.rand(5, 3) v1, v2 = Variable(x), Variable(x) rs_rows = ReduceSum(v1, 0) rs_cols = ReduceSum(v2, 1) np.testing.assert_allclose(rs_rows.forward(), np.sum(x, 0)) np.testing.assert_allclose(rs_cols.forward(), np.sum(x, 1)) grad_rows = np.random.rand(3,) rs_rows.backward(grad_rows) np.testing.assert_allclose(v1.get_gradient(), grad_rows * np.ones((5, 3))) grad_cols = np.random.rand(5,) rs_cols.backward(grad_cols) np.testing.assert_allclose(v2.get_gradient(), (grad_cols * np.ones((5, 3)).T).T)
def __init__(self, inputs_num: int, outputs_num: int, activation_function: ActivationFunction.__class__, input_variable=None): super().__init__() self._af = activation_function self._w = Variable( np.random.uniform(-1 / math.sqrt(inputs_num), 1 / math.sqrt(inputs_num), (inputs_num, outputs_num))) self._b = Variable(np.zeros(outputs_num)) self._input = input_variable self._output = self._af(Add(Multiply(self._input, self._w), self._b))
def __init__(self, architecture: list, loss, weight_decay=0.0): weight_decay = np.float64(weight_decay) self._x_variable = Variable(None) # TODO: call it placeholder self._y_variable = Variable(None) self._architecture, self._prediction_variable, regularization_cost = \ mydnn._build_architecture_get_prediction_and_regularization_cost( architecture, weight_decay, self._x_variable ) loss_class = loss_name_to_class[loss] self._is_classification = loss_class == CrossEntropy self._loss_variable = Add( loss_class(self._y_variable, self._prediction_variable), regularization_cost)
def __init__(self, label: GraphNode, predicted: GraphNode): super().__init__(label, predicted) diff = Add(predicted, HadamardMult(Variable(-1.0), label)) splitter = Splitter(diff, 2) square = HadamardMult(splitter, splitter) mse = ReduceMean(square, 0) self._node = mse
def test_reduce_sum_merged(self): # Matrix x = np.array([[1, 2, 3], [11, 12, 13]]) v = Variable(x) rs = ReduceSum(v, 1) np.testing.assert_allclose(rs.forward(), np.array([6, 36]), rtol=1e-5) rs2 = ReduceSum(v, 0) np.testing.assert_allclose(rs2.forward(), np.array([12, 14, 16]), rtol=1e-5) op_sum = ReduceSum(ReduceSum(v, 0), 0) np.testing.assert_allclose(op_sum.forward(), np.sum(x), rtol=1e-5) # Array y = np.array([-0.5, 1, 2.5]) v2 = Variable(y) r = ReduceSum(v2, 0) np.testing.assert_allclose(r.forward(), 3.0, rtol=1e-5) r.backward(1) np.testing.assert_equal(v2.get_gradient(), [1, 1, 1])
def test_reduce_mean_forward_backward(self): x = np.array([[1.0], [2.0], [3.0], [4.0]]) x_variable = Variable(x) reduce_mean_x_node = ReduceMean(x_variable, axis=0) reduce_mean_x_actual = reduce_mean_x_node.forward() reduce_mean_x_desired = x.mean() np.testing.assert_allclose(reduce_mean_x_actual, reduce_mean_x_desired) reduce_mean_x_node.backward(grad=np.array([5.0])) dl_dx_actual = x_variable.get_gradient() dl_dx_desired = np.full(x.shape, 5.0 / x.shape[0]) np.testing.assert_allclose(dl_dx_actual, dl_dx_desired)
def test_square(self): x = np.array([[1], [2], [3]]) x_variable = Variable(x) x2_node = HadamardMult(x_variable, x_variable) x2_actual = x2_node.forward() x2_desired = x * x np.testing.assert_almost_equal(x2_actual, x2_desired) dl_dx2 = np.array([[4], [5], [6]]) x2_node.backward(dl_dx2) dl_dx_desired = 2.0 * dl_dx2 * x dl_dx_actual = x_variable.get_gradient() np.testing.assert_almost_equal(dl_dx_desired, dl_dx_actual)
def test_forward_backward(self): y = np.arange(1, 7).reshape(3, 2) y_variable = Variable(y) log_node = Log(y_variable) y_log_actual = log_node.forward() y_log_desired = np.log(y) np.testing.assert_allclose(y_log_actual, y_log_desired) dl_dlogy = np.arange(7, 13).reshape(3, 2) log_node.backward(dl_dlogy) dl_dy_actual = y_variable.get_gradient() dl_dy_desired = dl_dlogy / y np.testing.assert_allclose(dl_dy_actual, dl_dy_desired)
def test_forward(self): w = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) x = np.array([[13, 14, ], [15, 16, ], [17, 18, ], [19, 20, ]]) wx_desired = np.array([[170, 180], [426, 452], [682, 724]]) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired)
def test_backward_1(self): w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) x = np.array([[9, 10, 11], [12, 13, 14]]) dl_dwx = np.ones((w.shape[0], x.shape[1])) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_desired = w @ x wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired) wx_variable.backward(grad=dl_dwx) dl_dx_actual = x_variable.get_gradient() dl_dx_desired = np.array([[16, 16, 16], [20, 20, 20]]) self.assertEqual(dl_dx_desired.shape, dl_dx_actual.shape) np.testing.assert_allclose(dl_dx_actual, dl_dx_desired)
def test_forward_backward(self): left = np.array([[1], [2], [3], [4]]) right = np.array([[5], [6], [7], [8]]) left_variable = Variable(left) right_variable = Variable(right) left_right_node = HadamardMult(left_variable, right_variable) left_right_actual = left_right_node.forward() left_right_expected = left * right np.testing.assert_allclose(left_right_actual, left_right_expected) dl_d_left_right = np.array([[9], [10], [11], [12]]) left_right_node.backward(dl_d_left_right) d_l_d_left_actual = left_variable.get_gradient() d_l_d_right_actual = right_variable.get_gradient() d_l_d_left_desired = dl_d_left_right * right d_l_d_right_desired = dl_d_left_right * left np.testing.assert_allclose(d_l_d_left_actual, d_l_d_left_desired) np.testing.assert_allclose(d_l_d_right_actual, d_l_d_right_desired)
def test_vector(self): left = np.full((4, 1), -1) right = np.array([[1], [2], [3], [4]]) left_variable = Variable(left) right_variable = Variable(right) left_right_node = HadamardMult(left_variable, right_variable) left_right_actual = left_right_node.forward() left_right_desired = np.array([[-1], [-2], [-3], [-4]]) np.testing.assert_allclose(left_right_actual, left_right_desired) dl_d_left_right = np.array([[5], [6], [7], [8]]) left_right_node.backward(dl_d_left_right) dl_dleft_actual = left_variable.get_gradient() dl_dright_actual = right_variable.get_gradient() dl_dleft_desired = np.array([[1 * 5], [2 * 6], [3 * 7], [4 * 8]]) dl_dright_desired = np.array([[-5], [-6], [-7], [-8]]) np.testing.assert_allclose(dl_dleft_actual, dl_dleft_desired) np.testing.assert_allclose(dl_dright_actual, dl_dright_desired)
class FullyConnectedLayer(GraphNode): def __init__(self, inputs_num: int, outputs_num: int, activation_function: ActivationFunction.__class__, input_variable=None): super().__init__() self._af = activation_function self._w = Variable( np.random.uniform(-1 / math.sqrt(inputs_num), 1 / math.sqrt(inputs_num), (inputs_num, outputs_num))) self._b = Variable(np.zeros(outputs_num)) self._input = input_variable self._output = self._af(Add(Multiply(self._input, self._w), self._b)) def forward(self): return self._output.forward() def backward(self, grads=None): self._output.backward(grads) def reset(self): self._output.reset() def get_value(self): return self._output.get_value() def update_grad(self, learning_rate): # param_scale = np.linalg.norm(self._w.get_value()) # update_scale = np.linalg.norm(-learning_rate * self._w.get_gradient()) # logger.info('Update magnitude is %f (desired is about %f)', update_scale / param_scale, 1e-3) self._w.update_grad(learning_rate) self._b.update_grad(learning_rate) def get_weight(self): return self._w
def test_backward_2(self): w = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) x = np.array([[9, 10, 11], [12, 13, 14]]) dl_dwx = np.arange(1, 1 + w.shape[0] * x.shape[1]).reshape(w.shape[0], x.shape[1]) w_variable = Variable(w) x_variable = Variable(x) wx_variable = Multiply(w_variable, x_variable) wx_desired = w @ x wx_actual = wx_variable.forward() np.testing.assert_allclose(wx_actual, wx_desired) wx_variable.backward(grad=dl_dwx) dl_dw_actual = w_variable.get_gradient() dl_dw_desired = np.array([[1 * 9 + 2 * 10 + 3 * 11, 1 * 12 + 2 * 13 + 3 * 14], [4 * 9 + 5 * 10 + 6 * 11, 4 * 12 + 5 * 13 + 6 * 14], [7 * 9 + 8 * 10 + 9 * 11, 7 * 12 + 8 * 13 + 9 * 14], [10 * 9 + 11 * 10 + 12 * 11, 10 * 12 + 11 * 13 + 12 * 14]]) self.assertEqual(dl_dw_desired.shape, dl_dw_actual.shape) np.testing.assert_allclose(dl_dw_actual, dl_dw_desired)
def test_forward(self): x = np.array([[1, 2], [3, 4], [5, 6]]) b = np.array([[7, 8], [9, 10], [11, 12]]) dl_dxb = np.array([[13, 14], [15, 16], [17, 18]]) x_variable = Variable(x) b_variable = Variable(b) wx_variable = Add(x_variable, b_variable) wx_variable.forward() wx_variable.backward(dl_dxb) dl_dx_actual = x_variable.get_gradient() dl_db_actual = b_variable.get_gradient() np.testing.assert_allclose(dl_dx_actual, dl_dxb) np.testing.assert_allclose(dl_db_actual, dl_dxb)