def _update_table_sample(): def _update_table_model(): def _lookup_table(): if __name__ == "__main__": maze = SimpleMaze() agent = QAgent(actions=maze.ACTIONS, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1) # logging path = deque() # path in this episode episode_reward_rates = [] num_episodes = 0 cum_reward = 0 cum_steps = 0 # repeatedly run episodes while True: # initialization maze.reset() agent.reset(foget_table=False) action, _ = agent.observe_and_act(observation=None, last_reward=None) # get and random action path.clear() episode_reward = 0 episode_steps = 0 # interact and reinforce repeatedly while not maze.isfinished(): new_observation, reward = maze.interact(action) action, _ = agent.observe_and_act(observation=new_observation, last_reward=reward) path.append(new_observation) episode_reward += reward episode_steps += 1 print len(path), cum_steps += episode_steps cum_reward += episode_reward num_episodes += 1 episode_reward_rates.append(episode_reward / episode_steps) if num_episodes % 100 == 0: print num_episodes, len(agent.q_table), cum_reward, cum_steps, 1.0 * cum_reward / cum_steps#, path cum_reward = 0 cum_steps = 0 win = 50
reward_scaling=100.0, reward_scaling_update='adaptive', rs_period=2, batch_size=100, update_period=10, freeze_period=2, memory_size=1000, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02, verbose=2) print "Maze and agent initialized!" # logging path = deque() # path in this episode episode_reward_rates = [] num_episodes = 0 cum_reward = 0 cum_steps = 0 # repeatedly run episodes while True: maze.reset() agent.reset() action, _ = agent.observe_and_act(observation=None, last_reward=None) # get and random action path.clear() episode_reward = 0 episode_steps = 0 episode_loss = 0 # print '(', # interact and reinforce repeatedly while not maze.isfinished(): new_observation, reward = maze.interact(action) action, loss = agent.observe_and_act(observation=new_observation, last_reward=reward) # print new_observation, # print action, # print agent.fun_rs_lookup(),