def test_full_sgmoid_node(self): w_node = node.VarNode('w', True) x_node = node.VarNode('x') ya_node = node.VarNode('y_a') b_node = node.VarNode('b', True) start_nodes = [w_node, x_node, b_node, ya_node] w = np.array([[1, 3, 0], [0, 1, -1]]) x = (np.array([[1, -1, 2]])).T b = np.array([[-2, -3]]).T y_act = np.array([[.5, .7]]).T var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b} wx_node = node.MatrixMultiplication(w_node, x_node) sum_node = node.MatrixAddition(wx_node, b_node) sigmoid_node = SigmoidNode(sum_node) l2_node = L2DistanceSquaredNorm(sigmoid_node, ya_node) optim_func = self.rate_adjustable_optimizer_func(0.01) optimizer = core.np.Optimization.OptimizerIterator( start_nodes, l2_node, optim_func) log_at_info() losses = [] for i in range(100): loss = optimizer.step(var_map, 1.0) losses.append(loss) if i % 10 == 0: print("[{}] Loss:{}".format(i, loss)) print("Final loss:{}".format(loss)) print("w:{}".format(var_map['w'])) print("b:{}".format(var_map['b']))
def test_train(self): num_iter = 100000 x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 15) loss_node = loss.SoftmaxCrossEntropy(rnn_node, y_target_node) all_losses = [] optimizer_func = autodiff_optim.AdamOptimizer() optimizer_func = autodiff_optim.SGDOptimizer(lr=0.0001) optimizer = autodiff_optim.OptimizerIterator([x_node, y_target_node], loss_node, optimizer_func) ctx = n.ComputeContext({'x': "", 'y_target': ""}) log_at_info() every = 500 t = time.time() for i in range(1, num_iter + 1): rnn_node.set_initial_state_to_zero() c, l, category_index, name_tensor = self.name_ds.random_training_example( ) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) ctx['x'] = name_tensor ctx['y_target'] = cat_tensor ctx['i'] = i loss_value = optimizer.step(ctx, 1.0) all_losses.append(loss_value) if i % every == 0: t = time.time() - t last_10 = all_losses[-every:] av = np.average(last_10) info("[{:06d}] Avg. loss = {:10.6f}" " | {:04.2f}s per {} | Total Iters set to:{}".format( i, av, t, every, num_iter)) all_losses = [] t = time.time()
def test_linear_transformation(self): np.random.seed(100) w_node = node.VarNode('w') x_node = node.VarNode('x') ya_node = node.VarNode('y_a') w = np.array([[1, 2, 1], [2, 0, -1]]) x = np.array( [[0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886], [0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333], [0.89132195, 0.20920212, 0.18532822, 0.10837689, 0.21969749]]) y_act = np.array( [[0.97862378, 0.81168315, 0.17194101, 0.81622475, 0.27407375], [0.43170418, 0.94002982, 0.81764938, 0.33611195, 0.17541045]]) info("Printing x...") info(x) info("printing y_pred") info(y_act) var_map = {'w': w, 'x': x, 'y_a': y_act} wx_node = node.MatrixMultiplication(w_node, x_node) l2_node = L2DistanceSquaredNorm(wx_node, ya_node) log_at_info() w_node.forward(var_map) x_node.forward(var_map) ya_node.forward(var_map) l2_node.backward(1.0, self, var_map) info(wx_node.value()) info("grad...") info(wx_node.total_incoming_gradient())
def test_sigmoid(self): r""" See TestActivations.Sigmoid.ipynb for the corresponding pytorch calculations :return: """ x = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3]]) x_node = node.VarNode('x') target = np.zeros(x.shape) target_node = node.VarNode('target') var_map = {'x': x, 'target': target} sigmoid = SigmoidNode(x_node) l2loss = L2DistanceSquaredNorm(sigmoid, target_node) x_node.forward(var_map) target_node.forward(var_map) value = sigmoid.value() expected_value = np.array([[0.73105858, 0.88079708, 0.95257413, 0.98201379], [0.95257413, 0.98201379, 0.99330715, 0.99752738], [0.26894142, 0.5, 0.73105858, 0.95257413]]) np.testing.assert_almost_equal(expected_value, value) loss = l2loss.value() info("L2 Loss:{}".format(loss)) log_at_info() l2loss.backward(1.0, self, var_map) x_grad = x_node.total_incoming_gradient() expected_x_grad = np.array([[0.28746968, 0.18495609, 0.08606823, 0.03469004], [0.08606823, 0.03469004, 0.01320712, 0.00492082], [0.10575419, 0.25, 0.28746968, 0.08606823]]) info("-------------------------------------------------------------") info("x_grad = np.{}".format(repr(x_grad))) info("x_grad_expected= np.{}".format(repr(expected_x_grad))) np.testing.assert_almost_equal(expected_x_grad, x_grad)
def test_linear_fit(self): epochs = 2000 iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 3) softmax = act.Softmax(dense) cross_entropy = loss.CrossEntropy(softmax, yt_node) optimizer_func = core.np.Optimization.AdamOptimizer(lr=0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 ctx = node.ComputeContext() for x, y in iris.train_iterator(epochs, 8): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) if epoch % 100 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 40, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): ctx['x'], ctx['yt'] = x, y_actual y_predicted = f.at(ctx) max_idx = np.argmax(y_predicted) if max_idx == y_actual: correct += 1 percent = correct * 100 / total print("Correct= {}%".format(percent))
def test_linear_training_tf_fast(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First") relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second") relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third") cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 5 # use 25 for SGD optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] x_train, y_train, x_val, y_val, x_test, y_test = mn.load_dataset( flatten=True) iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) total_time = time.time() ctx = node.ComputeContext({}) for epoch in range(epochs): epoch_time = time.time() for x, y in iterate_over_minibatches(x_train, y_train, batch_size=batch_size): ctx['x'], ctx['yt'] = x.T, to_one_hot(y, max_cat_num=9) iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter_count += 1 epoch_time = time.time() - epoch_time loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) ctx['x'], ctx['yt'] = x_val.T, to_one_hot(y_val, max_cat_num=9) y_predicted = predictor(ctx) arg_max = np.argmax(y_predicted, axis=0) correct = arg_max == y_val percent = np.mean(correct) * 100 info("Epoch {:2d}:: Validation " "accuracy:[{:5.2f}%] loss av={:01.8f}, time:{:2.3f}s".format( epoch, percent, loss_av, epoch_time)) self.assertTrue(percent > 95) total_time = time.time() - total_time info("[Mnist784DsTest.test_linear_training()] total_time = {:5.3f} s". format(total_time))
def test_multi_layer(self): r""" This actually performs better with SGD and normal initialization. Gets almost 99% with SGD and normal initialization :return: """ iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 16) tanh = act.TanhNode(dense) dense2 = node.DenseLayer(tanh, 10) relu = act.RelUNode(dense2) dense3 = node.DenseLayer(relu, 3) softmax = act.Softmax(dense3) cross_entropy = loss.CrossEntropy(softmax, yt_node) #optimizer_func = core.np.Optimization.AdamOptimizer() optimizer_func = core.np.Optimization.SGDOptimizer(lr=0.01) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 epochs = 10000 batch_size = 8 ctx = node.ComputeContext(weight_initializer=None) for x, y in iris.train_iterator(epochs, batch_size): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) / batch_size if epoch % 500 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 100, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): var_map = {'x': x, 'yt': y_actual} y_predicted = f(var_map) max_idx = np.argmax(y_predicted) mark = 'x' if max_idx == y_actual: correct += 1 mark = u'\u2713' print("X:{}, y_pred:{}, Actual={}, Predicted:{} {}".format( x.T, y_predicted.T, y_actual[0], max_idx, mark)) percent = correct * 100 / total print("Correct= {}%".format(percent)) self.assertTrue(percent > 95)
def test_basic_op(self): np.random.seed(100) x = np.array([[1, -1], [2, 3], [-1, -2]], dtype=np.float) y = np.array([[-1, 1], [-3, -1]], dtype=np.float) x_node = node.VarNode('x') y_target = node.VarNode('y_target') dense = node.DenseLayer(x_node, 2, self.model_w, self.model_b) l2_node = L2DistanceSquaredNorm(dense, y_target) var_map = {'x': x, 'y_target': y} x_node.forward(var_map) y_target.forward(var_map) log_at_info() value = dense.value() info("------------------------------------------") info("Predicted value = np.{}".format(repr(value))) info("Target value = np.{}".format(repr(y))) value = l2_node.value() info("L2 node value (loss):{}".format(value)) info("------------------------------------------") info("Printing weights (not updated yet)") info("------------------------------------------") info("Linear layer weight = np.{}".format(repr(dense.get_w()))) info("Linear layer bias = np.{}".format(repr(dense.get_b()))) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, y_target], l2_node, optim_func) optimizer.step(var_map, 1.0) np.set_printoptions(precision=64, floatmode='maxprec_equal') info("------------------------------------------") info("Printing after updating weights") info("------------------------------------------") info("Linear layer weight:{}".format(repr(dense.get_w()))) info("Linear layer bias:{}".format(repr(dense.get_b()))) info("w_grad = np.{}".format(repr(dense.get_w_grad()))) info("b_grad = np.{}".format(repr(dense.get_b_grad()))) expected_weight = np.array([[1.0000, 2.9850, -0.9910], [-0.0040, -3.9755, 1.9845]]) expected_bias = np.array([[-3.006], [2.009]]) expected_w_grad = np.array([[0.0, 15.0, -9.0], [4.0, -24.5, 15.5]]) expected_b_grad = np.array([[6.], [-9.]]) np.testing.assert_almost_equal(expected_weight, dense.get_w()) np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad()) np.testing.assert_almost_equal(expected_bias, dense.get_b()) np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())
def test_convolution_with_l2(self): img = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3], [0, 2, -1, 4]]) kernel = np.array([[1, -1], [0, 2]]) y = np.ones((3, 3)) img_node = node.VarNode('img') c2d = conv.Convolution2D(img_node, input_shape=(4, 4), kernel=kernel) target_node = node.VarNode('y') l2 = L2DistanceSquaredNorm(c2d, target_node) var_map = {'img': img, 'y': y} img_node.forward(var_map) target_node.forward(var_map) info("Original x into the convolution layer") info(repr(img)) output_image = c2d.value() info("Output of the convolution layer") expected_output = np.array([[7., 9., 11.], [-1., 1., 5.], [3., -3., 6.]]) np.testing.assert_array_almost_equal(expected_output, output_image) info(repr(output_image)) log_at_info() info("Kernel before gradient descent") info(repr(c2d.get_kernel())) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator([img_node, target_node], l2, optim_func) loss = optimizer.step(var_map, 1.0) info("Took a single gradient descent step - calculated weights and updated gradients") info("<<<<Printing loss matrix after single step>>>>") info(repr(loss)) info("Printing kernel:") info(repr(c2d.get_kernel())) info("--------------------------------------") info("Printing kernel gradient:") info(repr(c2d.get_kernel_grad())) info("-------------------------") info("Bias :{}".format(c2d.get_bias())) info("Bias gradient :{}".format(c2d.get_bias_grad())) expected_kernel = np.array([[0.98466667, -1.02288889], [-0.02066667, 1.96355556]]) np.testing.assert_array_almost_equal(expected_kernel, c2d.get_kernel()) expected_kernel_grad = np.array([[15.33333333, 22.88888889], [20.66666667, 36.44444444]]) np.testing.assert_array_almost_equal(expected_kernel_grad, c2d.get_kernel_grad()) expected_bias = -0.0064444444444444445 expected_bias_grad = 6.444444444444445 np.testing.assert_almost_equal(expected_bias, c2d.get_bias()) np.testing.assert_almost_equal(expected_bias_grad, c2d.get_bias_grad())
def run_model(self, model, optimizer_func, epochs): var_map, start_nodes, l2_node = models.make__two_layer_model() optimizer = core.np.Optimization.OptimizerIterator( start_nodes, l2_node, optimizer_func) log_at_info() count = 0 losses = [] sum_losses = 0 av = [] x_axis = [] for (x, y) in model.data(epochs, 2): # print("count:{}".format(count)) var_map['x'] = x var_map['y_a'] = y loss = optimizer.step(var_map, 1.0) losses.append(loss) x_axis.append(count) sum_losses += loss if count % 500 == 0: last_100 = losses[-100:] average_l100 = sum(last_100) / len(last_100) av.append([count, average_l100]) print("[{}] Current loss:{} Average loss so far:{}".format( count, loss, average_l100)) count += 1 last_100 = losses[-100:] average_l100 = sum(last_100) / len(last_100) av.append([count, average_l100]) info("Now printing w and b ..W:") info(var_map['w']) info("-------------b:") info(var_map['b']) info("---- print w2 and b2... W2:") info(var_map['w2']) info("----- b2 ----") info(var_map['b2']) info("[{}] Current loss:{} Average loss so far:{}".format( count, loss, average_l100))
def test_rnn_layer_with_loss(self): debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] self.data_dir = {}" .format(self.data_dir)) x = self.name_ds.line_to_numpy('ABCD') debug("[RnnLayerFullTests.test_rnn_layer_with_loss()] ABCD: x = np.{}". format(repr(x))) debug("------------------------------------------------------") x = self.name_ds.line_to_numpy('Albert') debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] x = np.{}".format( repr(x))) debug("------------------------------------------------------") log_at_info() for i in range(5): c, l, category_index, name_tensor = self.name_ds.random_training_example( ) debug("[{}]:{}".format(c, l)) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] cat_tensor = np.{}" .format(repr(cat_tensor))) x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') ctx = n.ComputeContext({'x': name_tensor, 'y_target': cat_tensor}) rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 128) loss_node = loss.LogitsCrossEntropy(rnn_node, y_target_node) x_node.forward(ctx) y_target_node.forward(ctx) y = rnn_node.value() info( "[RnnLayerFullTests.test_rnn_layer_with_loss()] y = np.{}".format( repr(y))) loss_value = loss_node.value() info("[RnnLayerFullTests.test_rnn_layer_with_loss()] loss = np.{}". format(repr(loss_value))) loss_node.backward(1.0, self, ctx) grads = rnn_node.total_incoming_gradient() info(grads)
def do_linear_optimization(self, optim_func, epochs=25000, batch_size=8, do_assert=True): np.random.seed(100) x_node = node.VarNode('x') y_node = node.VarNode('y') net_w = np.array([[-1, -3, 1], [0, 4, -2]]) net_b = np.array([3, -2]).reshape((2, 1)) dense = node.DenseLayer(x_node, 2, net_w, net_b) l2_node = L2DistanceSquaredNorm(dense, y_node) # optim_func = self.rate_adjustable_optimizer_func(0.01) # adam = core.np.Optimization.AdamOptimizer() optimizer = core.np.Optimization.OptimizerIterator([x_node, y_node], l2_node, optim_func) log_at_info() epoch = 0 losses = [] for x, y in self.model.data(epochs, batch_size): var_map = {'x': x, 'y': y} loss = optimizer.step(var_map, 1.0) # losses.append(loss) if epoch % 100 == 0: losses.append([epoch, loss]) if epoch % 1000 == 0: info("[{}] Loss:{}".format(epoch, loss)) epoch += 1 info("[{}] Loss:{}".format(epoch, loss)) dense_w = dense.get_w() dense_b = dense.get_b() info("w = np.{}".format(repr(dense_w))) info("b = np.{}".format(repr(dense_b))) if do_assert: np.testing.assert_array_almost_equal(dense_w, self.model_w, 3) np.testing.assert_array_almost_equal(dense_b, self.model_b, 3) return np.array(losses)
def test_network_optimizer(self): w_node = node.VarNode('w', True) x_node = node.VarNode('x') ya_node = node.VarNode('y_a') b_node = node.VarNode('b', True) start_nodes = [w_node, x_node, b_node, ya_node] w = np.array([[1, 3, 0], [0, 1, -1]]) x = (np.array([[1, -1, 2]])).T b = np.array([[-2, -3]]).T y_act = np.array([[1, 2]]).T var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b} wx_node = node.MatrixMultiplication(w_node, x_node) sum_node = node.MatrixAddition(wx_node, b_node) l2_node = L2DistanceSquaredNorm(sum_node, ya_node) optimizer = optim.OptimizerIterator(start_nodes, l2_node) log_at_info() for _ in range(500): loss = optimizer.step(var_map, 1.0) info("Final loss:{}".format(loss)) self.assertTrue(math.fabs(loss) < 1e-25)
def test_convolution_small(self): img = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3], [0, 2, -1, 4]]) kernel = np.array([[1, -1], [0, 2]]) img_node = node.VarNode('img') c2d = conv.Convolution2D(img_node, input_shape=(4, 4), kernel=kernel) var_map = {'img': img} img_node.forward(var_map) info("Original x into the convolution layer") info(repr(img)) output_image = c2d.value() info("Output of the convolution layer") expected_output = np.array([[7., 9., 11.], [-1., 1., 5.], [3., -3., 6.]]) np.testing.assert_array_almost_equal(expected_output, output_image) info(repr(output_image)) log_at_info() c2d.backward(output_image * 0.1, self, var_map) info("Kernel before gradient descent") info(repr(c2d.get_kernel())) def optimizer_function(_w, grad, local_node_storage={}): return _w - 0.001 * grad optimizer = core.np.Optimization.OptimizerIterator([img_node], c2d, optimizer_function) loss = optimizer.step(var_map, np.ones_like(output_image)) info("Printing loss matrix - not really loss but just the output of the last node") info(repr(loss)) info("Printing kernel after gradient descent") info(repr(c2d.get_kernel())) expected_kernel = np.array([[0.998, -1.003111], [-1.444444444e-3, 1.9973333]]) info("kernel gradient:{}".format(repr(c2d.kernel_grad))) np.testing.assert_array_almost_equal(expected_kernel, c2d.get_kernel()) self.assertAlmostEqual(-0.001, c2d.get_bias()) info("Bias after gradient descent:{}".format(c2d.get_bias())) info("Gradient of bias :{}".format(c2d.bias_grad))
def test_linear_training(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First", weight_scale=0.01) relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second", weight_scale=0.01) relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third", weight_scale=0.01) cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 3 optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) ctx = node.ComputeContext({}) mnist = Mnist784() total_time = time.time() for epoch in range(epochs): epoch_time = time.time() iter = 0 for x, y in mnist.train_iterator_seq(batch_size=batch_size): ctx['x'], ctx['yt'] = x, y iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter += 1 if iter % 100 == 0: print("iter:{}".format(iter)) loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) e, xv, yv = mnist.test_iterator(1, batch_size=-1, one_hot=False) ctx['x'], ctx['yt'] = xv, yv percent = self.measure_validation_perf(predictor, ctx, yv) epoch_time = time.time() - epoch_time info("Iter {:2d}:: Val:{:2.4f}% , loss av={:01.8f}, time:{:2.3f}s". format(epoch, percent, loss_av, epoch_time)) total_time = time.time() - total_time info("Total time taken:{:4.4f}".format(total_time))
def test_basic_op_large_matrix(self): r""" Runs test for a slightly larger matrix :return: """ x = np.array([[ 0.54566752, 0.66921034, 0.35265542, 0.32324271, 0.35036963, 0.05317591 ], [ 0.97433629, 0.5027976, 0.15637831, 0.72948084, 0.42097552, 0.52522781 ], [ 0.41793729, 0.48112345, 0.46862087, 0.88918467, 0.48792933, 0.32439625 ], [ 0.4775774, 0.58105899, 0.35079832, 0.79657794, 0.3910011, 0.72908915 ]]) w = np.array([[0.61013274, 0.86914947, 0.95211922, 0.96385655], [0.64290252, 0.2717017, 0.193146, 0.05004571], [0.14360354, 0.54256991, 0.90870491, 0.06577582]]) b = np.array([[0.76026806], [0.32982798], [0.01258297]]) pred = w @ x + b target = np.ones_like(pred) x_node = node.VarNode('x') target_node = node.VarNode('y_target') dense = node.DenseLayer(x_node, 3, w, b) l2_node = L2DistanceSquaredNorm(dense, target_node) var_map = {'x': x, 'y_target': target} x_node.forward(var_map) target_node.forward(var_map) log_at_info() predicted = dense.value() info("------------------------------------------") info("Predicted value = np.{}".format(repr(predicted))) info("Target value = np.{}".format(repr(target))) loss = l2_node.value() info("L2 node value (loss):{}".format(loss)) info("------------------------------------------") info("Printing weights (not updated yet)") info("------------------------------------------") info("Linear layer weight = np.{}".format(repr(dense.get_w()))) info("Linear layer bias = np.{}".format(repr(dense.get_b()))) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator( [x_node, target_node], l2_node, optim_func) optimizer.step(var_map, 1.0) np.set_printoptions(precision=64, floatmode='maxprec_equal') info("------------------------------------------") info("Printing after updating weights") info("------------------------------------------") info("weight=np.{}".format(repr(dense.get_w()))) info("w_grad = np.{}".format(repr(dense.get_w_grad()))) info("bias = np.{}".format(repr(dense.get_b()))) info("b_grad = np.{}".format(repr(dense.get_b_grad()))) # These are values from pytorch expected_weight = np.array( [[0.60973525, 0.86854088, 0.95157486, 0.96327269], [0.64292222, 0.27173772, 0.19318908, 0.05009926], [0.14362818, 0.54258782, 0.90872669, 0.06581017]]) expected_w_grad = np.array( [[0.39752683, 0.60859025, 0.54437733, 0.58387089], [-0.01970989, -0.03603142, -0.04307830, -0.05355303], [-0.02465229, -0.01786957, -0.02174304, -0.03434603]]) expected_bias = np.array([[0.75927186, 0.32992661, 0.01267095]]).T expected_b_grad = np.array([[0.99619532, -0.09862594, -0.08797690]]).T np.testing.assert_almost_equal(expected_weight, dense.get_w()) np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad()) np.testing.assert_almost_equal(expected_bias, dense.get_b()) np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())