def test_gradient_finite_differences(self): check_errors = [] for l, a in itertools.product(self.layer_types, self.activation_functions): net = self.build_network(l, a) e, grad_calc, grad_approx = check_gradient(net, n_batches=5, n_timesteps=7, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx) print('$$$$$$$$$$$$ approx $$$$$$$$$$$') diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print('$$$$$$$$$$$$ calc $$$$$$$$$$$') b = Matrix(grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print('$$$$$$$$$$$$ diff $$$$$$$$$$$') b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print('\n') print("Checking Gradient of %s with %s = %0.4f" % (l(3), a, e)) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def test_gradient_forked_architecture(self): check_errors = [] in_layer = InputLayer(self.input_size) out_layer = ForwardLayer(self.output_size) in_layer >> ForwardLayer(3, name='A') >> out_layer in_layer >> ForwardLayer(2, name='B') >> out_layer net = build_net(out_layer) net.initialize(Gaussian(0.1)) e, grad_calc, grad_approx = check_gradient(net, n_batches=5, n_timesteps=7, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print("Checking Gradient of forked architecture = %0.4f" % e) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def test_gradient_finite_differences(self): e, grad_calc, grad_approx = check_gradient(self.net, n_batches=self.batch_size, n_timesteps=self.timesteps, rnd=rnd) # construct a weight view and break down the differences layer = self.net.layers.values()[1] # the only layer a = Matrix(grad_approx) b = Matrix(grad_approx - grad_calc) c = Matrix(grad_calc) diff = layer.create_param_view(b) approx = layer.create_param_view(a) calc = layer.create_param_view(c) E = 0.0 for n, q in diff.items(): if n == 'Timing': continue print("====== %s ======" % n) print("Calculated:") print(calc[n]) print("Approx:") print(approx[n]) print("Difference:") print(q) err = np.sum(q ** 2) / self.batch_size print(err) E += err print("Checking Gradient of ClockworkLayer with sigmoid = %0.4f" % E) self.assertTrue(E < 1e-6)
def test_gradient_finite_differences(self): e, grad_calc, grad_approx = check_gradient(self.net, n_batches=self.batch_size, n_timesteps=self.timesteps, rnd=rnd) # construct a weight view and break down the differences layer = self.net.layers.values()[1] # the only layer a = Matrix(grad_approx) b = Matrix(grad_approx - grad_calc) c = Matrix(grad_calc) diff = layer.create_param_view(b) approx = layer.create_param_view(a) calc = layer.create_param_view(c) E = 0.0 for n, q in diff.items(): if n == 'Timing': continue print("====== %s ======" % n) print("Calculated:") print(calc[n]) print("Approx:") print(approx[n]) print("Difference:") print(q) err = np.sum(q**2) / self.batch_size print(err) E += err print("Checking Gradient of ClockworkLayer with sigmoid = %0.4f" % E) self.assertTrue(E < 1e-6)
def test_staticlstm_gradient_finite_differences(self): t = 7 b = 5 check_errors = [] net = self.build_staticlstm_network(3, 'sigmoid') e, grad_calc, grad_approx = check_gradient(net, n_batches=b, n_timesteps=t, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) # print("Checking Gradient of %s with %s = %0.4f" % (l(3), a, e)) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def test_lwta_gradient_finite_differences(self): check_errors = [] for a in self.activation_functions: net = self.build_lwta_network(8, a) e, grad_calc, grad_approx = check_gradient(net, n_batches=5, n_timesteps=7, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print("Checking Gradient of %s with LWTA = %0.4f" % (a, e)) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def test_gradient_finite_differences(self): check_errors = [] for cfg in self.lstm_configs: net = self.build_network(cfg) e, grad_calc, grad_approx = check_gradient(net, n_batches=10, n_timesteps=10, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) a = Matrix(grad_approx) c = Matrix(grad_calc) # appr = layer.create_param_view(a) # calc = layer.create_param_view(c) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) # print(appr[n]) # print(calc[n]) print(q) print("Checking Gradient of Lstm97 with %s = %0.4f" % (cfg, e)) self.assertTrue(np.all(np.array(check_errors) < 1e-4))