def test_get_item_not_full(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") self.assertEqual(buf[0], "test1") self.assertEqual(buf[1], "test2") self.assertEqual(buf[2], "test3")
def test_threading(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") thread1 = self.__class__.TestIterThreading(buf) thread2 = self.__class__.TestIterThreading(buf) thread1.start() thread2.start() thread1.join(3) thread2.join(3) self.assertListEqual(thread1.actual, ["test1", "test2", "test3"]) self.assertListEqual(thread2.actual, ["test1", "test2", "test3"])
def test_len(self): buf = RingBuffer(3) buf.append("test1") self.assertEqual(len(buf), 1) buf.append("test2") self.assertEqual(len(buf), 2) buf.append("test3") self.assertEqual(len(buf), 3) buf.append("test4") self.assertEqual(len(buf), 3)
def test_iter(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") buf.append("test4") buf.append("test5") actual1 = [] for i in buf: actual1.append(i) self.assertListEqual(actual1, ["test3", "test4", "test5"]) actual2 = [] for i in buf: actual2.append(i) self.assertListEqual(actual2, ["test3", "test4", "test5"])
class DQNAgent: def __init__(self, env, action_size, config): self.memory = RingBuffer(int( config.config_section_map()['memorysize'])) self.gamma = float( config.config_section_map()['gamma']) # discount rate self.epsilon = float( config.config_section_map()['epsilon']) # exploration rate self.epsilon_min = float(config.config_section_map()['epsilonmin']) self.epsilon_decay = float(config.config_section_map()['epsilondecay']) self.learning_rate = float(config.config_section_map()['learningrate']) self.action_size = action_size self.env = env self.dqn_model = DQNModel(self.learning_rate, action_size) def remember(self, state, action, reward, next_state, done): state = state.astype('uint8') next_state = next_state.astype('uint8') reward = np.sign(reward) self.memory.append((state, action, reward, next_state, done)) def action(self, fi_t, env_sample, csv_handler): num_random = random.uniform(0, 1) if num_random <= self.epsilon: # with probability epsilon do a random action return env_sample else: fi_t = np.expand_dims(fi_t, axis=0) action = self.dqn_model.model.predict( [fi_t, np.ones([1, self.action_size])]) csv_handler.write_q_values(action) return np.argmax(action[0]) def replay(self, batch_size, csv_logger): states = np.zeros((batch_size, 4, 84, 84), dtype='float32') actions = np.zeros((batch_size, 4), dtype='uint8') rewards = np.zeros(batch_size, dtype='float32') next_states = np.zeros((batch_size, 4, 84, 84), dtype='float32') dones = np.ones((batch_size, 4), dtype=bool) mini_batch = self.get_minibatch( batch_size) # sample random mini_batch from D i = 0 for state, action, reward, next_state, done in mini_batch: next_state = next_state.astype('float32') state = state.astype('float32') states[i] = state actions[i][action] = 1 rewards[i] = reward next_states[i] = next_state dones[i] = [done, done, done, done] i += 1 next_state_q_values = self.dqn_model.target_model.predict( [next_states, np.ones(actions.shape)]) next_state_q_values[dones] = 0 q_values = rewards + self.gamma * np.max(next_state_q_values, axis=1) # Trains the model for a fixed number of epochs (iterations on a dataset) self.dqn_model.model.fit([states, actions], actions * q_values[:, None], batch_size=batch_size, verbose=0, callbacks=[csv_logger]) def get_minibatch(self, batch_size): mini_batch = [] for i in range(batch_size): index = randint(0, self.memory.__len__() - 1) mini_batch.append(self.memory.__getitem__(index)) return mini_batch def load(self, name): self.dqn_model.model.load_weights(name) self.dqn_model.update_target_model() def save(self, name): self.dqn_model.model.save_weights(name) def decrease_epsilone(self): if self.epsilon > self.epsilon_min: self.epsilon -= self.epsilon_decay