def testFifo(self): """ testing fifo and _update_state method in VectorRegressionWithVariance """ print("\n * testing fifo and update_state method " "in VectorRegressionWithVariance \n") in_dim = 3 order = 3 len_ts = 10 model = VectorRegressionWithVariance(in_dim, in_dim, order) random = amath.random.RandomState(0) in_patterns = amath.random.uniform(size=(len_ts, in_dim)) fifo_test = amath.zeros((order, in_dim)) for i in xrange(len_ts): self.assertTrue(amath.allclose(model.fifo.to_array(), fifo_test)) model.learn_one_step(in_patterns[i]) popped_in_pattern = model._update_state(in_patterns[i]) if i < order: self.assertTrue( amath.allclose(popped_in_pattern, amath.zeros(in_dim))) else: self.assertTrue( amath.allclose(popped_in_pattern, in_patterns[i - order])) fifo_test[1:] = fifo_test[:-1] fifo_test[0] = in_patterns[i]
def testFifo(self): """ testing fifo and _update_state method in MultiTargetVectorRegression """ print("\n * testing fifo and update_state method " "in MultiTargetVectorRegression \n") in_dim = 3 out_dims = [2, 4] SGDs = [AdaGrad(), AdaGrad()] order = 3 len_ts = 10 model = MultiTargetVectorRegression(in_dim, out_dims, SGDs, order) random = amath.random.RandomState(0) in_patterns = amath.random.uniform(size=(len_ts, in_dim)) out_pattern_0 = amath.random.uniform(size=(len_ts, out_dims[0])) out_pattern_1 = amath.random.uniform(size=(len_ts, out_dims[1])) fifo_test = amath.zeros((order, in_dim)) for i in xrange(len_ts): self.assertTrue( amath.allclose(model.layers[0].fifo.to_array(), fifo_test)) model.learn_one_step([out_pattern_0[i], out_pattern_1[i]]) popped_in_pattern = model._update_state(in_patterns[i]) if i < order: self.assertTrue( amath.allclose(popped_in_pattern, amath.zeros(in_dim))) else: self.assertTrue( amath.allclose(popped_in_pattern, in_patterns[i - order])) fifo_test[1:] = fifo_test[:-1] fifo_test[0] = in_patterns[i]
def test_update_state(self): print(""" --------------------------------- test updating internal states --------------------------------- """) anc_test = amath.random.uniform(self.low, self.high, (self.n_anc, self.dim)) model = FunctionalDyBM( self.dim, anc_test, self.delay, self.decay_rates) loc = amath.random.uniform(self.low, self.high, (self.n_obs, self.dim)) for i in range(10): pattern = amath.zeros(self.n_obs) pattern[i % self.n_obs] = 1 fifo_old = model.fifo[:-1] fifo_last = model.fifo[-1] e_trace_old = model.e_trace model._update_state(pattern, loc) # check fifo update self.assertTrue(amath.allclose(model.fifo[1:], fifo_old)) # check e_trace update e_trace_new = amath.zeros((self.n_elg, self.n_anc)) + fifo_last for k in xrange(e_trace_new.shape[0]): e_trace_new[k, :] = e_trace_new[k, :] \ + self.decay_rates[k] * e_trace_old[k, :] self.assertTrue(amath.allclose(model.e_trace, e_trace_new)) # check mu_anc update self.assertTrue(amath.allclose(model.mu, amath.zeros(self.n_anc))) print("mu(P) = {}".format(model.mu)) return 0
def test_u_tilde(self): """ check _get_u_tilde """ in_patterns = [amath.array([1, 0, 0, 0]), amath.array([0, 1, 0, 0]), amath.array([0, 0, 1, 0])] self.model._update_state(in_patterns[2]) self.model._update_state(in_patterns[1]) self.model._update_state(in_patterns[0]) u_tilde_test = amath.zeros(self.dim_hidden) for d in xrange(self.order): u_tilde_test += self.model.variables["U"][d, d, :] u_tilde_test += self.model.variables["b_h"] self.assertTrue(amath.allclose(u_tilde_test, self.model._get_u_tilde())) pass
def experiment(period, std, delay, decay, Nh, repeat, bidirectional, sigma=0.01): """ A run of experiment Parameters ---------- period : int period of the wave std : float standard deviation of noise delay : int delay decay : list list of decay rates Nh : int number of hidden units repeat : int number of iterations of training bidirectional : boolean whether to train bidirectionally sigma : float std of random initialization 0.01 is recommended in https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf """ """ Prepare data generators """ dim = 1 # dimension of the wave phase = amath.zeros(dim) # initial phase of the wave # forward sequence wave = NoisySawtooth(0, period, std, dim, phase, False) wave.reset(seed=0) # backward sequence revwave = NoisySawtooth(0, period, std, dim, phase, True) revwave.reset(seed=1) """ Prepare a Gaussian Bernoulli DyBM """ Nv = dim # number of visible units sgd = AdaGrad() dybm = GaussianBernoulliDyBM([delay, delay], [decay, decay], [Nv, Nh], [sgd, deepcopy(sgd)], sigma=sigma, insert_to_etrace="w_delay") dybm.layers[0].layers[1].SGD.set_learning_rate(0) dybm.layers[1].layers[1].SGD.set_learning_rate(0) """ Learn """ error = list() # list of numpy array bi_end = 0.5 bi_factor = 2 for i in range(repeat): # update internal states by reading forward sequence wave.add_length(period) dybm.get_predictions(wave) if bidirectional and i % (bi_factor + 1) == 0 and bi_factor > 0 \ and i < repeat * bi_end: # make a time-reversed DyBM dybm._time_reversal() # update internal states by reading backward sequence revwave.add_length(period) dybm.get_predictions(revwave) # learn backward sequence for one period revwave.add_length(period) dybm.learn(revwave, get_result=False) # make a non time-reversed DyBM dybm._time_reversal() else: # update internal states by reading forward sequence wave.add_length(period) dybm.get_predictions(wave) # learn forward sequence wave.add_length(period) result = dybm.learn(wave, get_result=True) if i % (bi_factor + 1) == bi_factor: rmse = RMSE(result["actual"], result["prediction"]) rmse = amath.to_numpy(rmse) error.append(rmse) return error, dybm, wave
def test(self): env = FourArmedBandit() observation = env.reset() action_dim = env.action_space.n state_dim = env.observation_space.n print( '\n\nUnit testing DySARSA RL Agent on four armed bandit problem\n') print('action dim is :', action_dim) print('state_dim is :', state_dim) print('Number of bandits: ', env.n_bandits) """ DySARSA model parameters """ delay = 2 decay = [0.2] discount = 0.99 temperature = 0.1 SGD = ADAM() learning_rate = 0.01 init_epsilon = 0.3 # initial exploration term (epsilon-greedy) final_epsilon = 0.1 # final exploration term (when using annealing) steps_per_episode = 10 train_steps = 1000 num_bandits = env.n_bandits total_reward = amath.zeros(num_bandits) amath.random.seed(7) """Create DySARSA function approximator model""" DySARSA_model = DYSARSA(state_dim, action_dim, delay, decay, discount, SGD, learning_rate, temperature, insert_to_etrace="w_delay", L1=0.00) print( '\nTraining new agent with DySARSA RL agent and Boltzmann policy') agent = DySARSAAgent(env=env, model=DySARSA_model, steps_per_episode=steps_per_episode, train_steps=train_steps, exploration="Boltzmann", init_epsilon=temperature, final_epsilon=final_epsilon, UseQLearning=False, frame_skip=False, suppress_print=True) agent.fit(test_every=2, test_num_eps=5, break_reward=100, render=False) print('\nTesting DySARSA RL agent with Boltzmann Policy') agent = DySARSAAgent(env=env, model=DySARSA_model, suppress_print=True) for i in range(5): reward = agent.predict(render=True) total_reward[agent.action] += reward print("\nAverage reward predicted for each arm is: ", total_reward / 5) print("The agent thinks bandit " + str(agent.action + 1) + " is the most rewarding....") if agent.action == num_bandits - 1: print("and the prediction is correct!") else: print("and the prediction is incorrect!") print("\n************************************") self.assertEqual(agent.action, num_bandits - 1) total_reward = amath.zeros(num_bandits) print( '\nTraining new agent with DySARSA RL agent and epsilon-greedy policy' ) agent = DySARSAAgent(env=env, model=DySARSA_model, steps_per_episode=steps_per_episode, train_steps=train_steps, exploration="greedy", init_epsilon=init_epsilon, final_epsilon=final_epsilon, UseQLearning=False, frame_skip=False, suppress_print=True) agent.fit(test_every=2, test_num_eps=5, break_reward=100, render=False) print('\nTesting DySARSA RL agent with epsilon-greedy Policy') agent = DySARSAAgent(env=env, model=DySARSA_model, suppress_print=True) for i in range(5): reward = agent.predict(render=True) total_reward[agent.action] += reward print("\nAverage reward predicted for each arm is: ", total_reward / 5) print("The agent thinks bandit " + str(agent.action + 1) + " is the most rewarding....") if agent.action == num_bandits - 1: print("and the prediction is correct!") else: print("and the prediction is incorrect!") self.assertEqual(agent.action, num_bandits - 1)