def setUpClass(cls): """Initializes a simple instance of network for testing.""" n_in = 2 n_hidden = 8 n_out = 2 W_in = np.ones((n_hidden, n_in)) W_rec = np.eye(n_hidden) W_out = np.ones((n_out, n_hidden)) b_rec = np.ones(n_hidden) b_out = np.ones(n_out) alpha = 0.6 cls.rnn = RNN(W_in, W_rec, W_out, b_rec, b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy)
def setUpClass(cls): cls.W_in = np.eye(2) cls.W_rec = np.eye(2) cls.W_out = np.eye(2) cls.W_FB = -np.ones((2, 2)) + np.eye(2) cls.b_rec = np.zeros(2) cls.b_out = np.zeros(2) cls.rnn = RNN(cls.W_in, cls.W_rec, cls.W_out, cls.b_rec, cls.b_out, activation=identity, alpha=1, output=softmax, loss=softmax_cross_entropy) cls.rnn.a = np.ones(2) cls.rnn.error = np.ones(2) * 0.5
def test_mimic_task(self): """Verifies that the proper RNN output is returned as label in a simple case where the RNN simply counts the number of time steps.""" from core import RNN from functions import identity, mean_squared_error n_in = 2 n_h = 2 n_out = 2 W_in_target = np.eye(n_in) W_rec_target = np.eye(n_h) W_out_target = np.eye(n_out) b_rec_target = np.zeros(n_h) b_out_target = np.zeros(n_out) alpha = 1 rnn_target = RNN(W_in_target, W_rec_target, W_out_target, b_rec_target, b_out_target, activation=identity, alpha=alpha, output=identity, loss=mean_squared_error) task = Mimic_RNN(rnn_target, p_input=1, tau_task=1) data = task.gen_data(100, 0) y = np.arange(1, 101) y_correct = np.array([y, y]).T self.assertTrue(np.isclose(data['train']['Y'], y_correct).all())
def test_kernl_reduce_rflo(self): """Verifies that KeRNL reduces to RFLO in special case. If beta is initialized to the identity while the gammas are all initialized to the network inverse time constant alpha, and the KeRNL optimizer has 0 learning rate (i.e. beta and gamma do not change), then KeRNL should produce the same gradients as RFLO if the approximate KeRNL of (1 - alpha) (rather than exp(-alpha)) is used.""" self.task = Add_Task(4, 6, deterministic=True, tau_task=2) self.data = self.task.gen_data(100, 0) alpha = 0.3 #RFLO np.random.seed(1) self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.optimizer_1 = Stochastic_Gradient_Descent(lr=0.001) self.learn_alg_1 = RFLO(self.rnn_1, alpha) #KeRNL with beta and gamma fixed to RFLO values np.random.seed(1) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.optimizer_2 = Stochastic_Gradient_Descent(lr=0.001) self.KeRNL_optimizer = Stochastic_Gradient_Descent(lr=0) A = np.eye(self.rnn_2.n_h) alpha_i = np.ones(self.rnn_2.n_h) * alpha self.learn_alg_2 = KeRNL(self.rnn_2, self.KeRNL_optimizer, A=A, alpha=alpha_i) monitors = [] np.random.seed(2) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(2) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) #Assert networks learned the same weights assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec) #Assert networks' parameters changed appreciably, despite a large #tolerance for closeness. self.assertFalse(np.isclose(self.W_rec, self.rnn_2.W_rec).all())
def test_small_lr_case(self): alpha = 1 self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_3 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) lr = 0.00001 self.optimizer_1 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_1 = RTRL(self.rnn_1) self.optimizer_2 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_2 = Future_BPTT(self.rnn_2, 25) self.optimizer_3 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_3 = Efficient_BPTT(self.rnn_3, 100) monitors = [] np.random.seed(1) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(1) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) np.random.seed(1) self.sim_3 = Simulation(self.rnn_3) self.sim_3.run(self.data, learn_alg=self.learn_alg_3, optimizer=self.optimizer_3, monitors=monitors, verbose=False) #Assert networks learned similar weights with a small tolerance. assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec, atol=1e-4) assert_allclose(self.rnn_2.W_rec, self.rnn_3.W_rec, atol=1e-4) #But that there was some difference from initialization self.assertFalse( np.isclose(self.rnn_1.W_rec, self.W_rec, atol=1e-4).all())