def _update_table_sample(): def _update_table_model(): def _lookup_table(): if __name__ == "__main__": maze = SimpleMaze() agent = QAgent(actions=maze.ACTIONS, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1) # logging path = deque() # path in this episode episode_reward_rates = [] num_episodes = 0 cum_reward = 0 cum_steps = 0 # repeatedly run episodes while True: # initialization maze.reset() agent.reset(foget_table=False) action, _ = agent.observe_and_act(observation=None, last_reward=None) # get and random action path.clear() episode_reward = 0 episode_steps = 0 # interact and reinforce repeatedly while not maze.isfinished(): new_observation, reward = maze.interact(action) action, _ = agent.observe_and_act(observation=new_observation, last_reward=reward) path.append(new_observation) episode_reward += reward episode_steps += 1 print len(path), cum_steps += episode_steps cum_reward += episode_reward num_episodes += 1 episode_reward_rates.append(episode_reward / episode_steps) if num_episodes % 100 == 0: print num_episodes, len(agent.q_table), cum_reward, cum_steps, 1.0 * cum_reward / cum_steps#, path cum_reward = 0 cum_steps = 0 win = 50
buffer_idx = np.random.randint(0, self.NUM_BUFFERS, (self.BATCH_SIZE,)) return (self.buffer_old_state[buffer_idx, sample_idx, :], self.buffer_action[buffer_idx, sample_idx], self.buffer_reward[buffer_idx, sample_idx], self.buffer_new_state[buffer_idx, sample_idx, :]) def isfilled(self): return all(self.filled) def reset(self): self.top = [-1]*self.NUM_BUFFERS self.filled = [False]*self.NUM_BUFFERS if __name__ == '__main__': maze = SimpleMaze() agent = QAgentNN(dim_state=(1, 1, 2), range_state=((((0, 3),(0, 4)),),), actions=maze.ACTIONS, learning_rate=0.01, reward_scaling=100.0, reward_scaling_update='adaptive', rs_period=2, batch_size=100, update_period=10, freeze_period=2, memory_size=1000, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02, verbose=2) print "Maze and agent initialized!" # logging path = deque() # path in this episode episode_reward_rates = [] num_episodes = 0 cum_reward = 0 cum_steps = 0