示例#1
0
    def _update_table_sample():

    def _update_table_model():

    def _lookup_table():

    
if __name__ == "__main__":
    maze = SimpleMaze()
    agent = QAgent(actions=maze.ACTIONS, alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.1)
    # logging
    path = deque()  # path in this episode
    episode_reward_rates = []
    num_episodes = 0
    cum_reward = 0
    cum_steps = 0

    # repeatedly run episodes
    while True:
        # initialization
        maze.reset()
        agent.reset(foget_table=False)
        action, _ = agent.observe_and_act(observation=None, last_reward=None)  # get and random action
        path.clear()
        episode_reward = 0
        episode_steps = 0

        # interact and reinforce repeatedly
        while not maze.isfinished():
            new_observation, reward = maze.interact(action)
            action, _ = agent.observe_and_act(observation=new_observation, last_reward=reward)
            path.append(new_observation)
            episode_reward += reward
            episode_steps += 1
        print len(path),

        cum_steps += episode_steps
        cum_reward += episode_reward
        num_episodes += 1
        episode_reward_rates.append(episode_reward / episode_steps)
        if num_episodes % 100 == 0:
            print num_episodes, len(agent.q_table), cum_reward, cum_steps, 1.0 * cum_reward / cum_steps#, path
            cum_reward = 0
            cum_steps = 0
    win = 50
示例#2
0
            buffer_idx = np.random.randint(0, self.NUM_BUFFERS, (self.BATCH_SIZE,))
            return (self.buffer_old_state[buffer_idx, sample_idx, :],
                    self.buffer_action[buffer_idx, sample_idx],
                    self.buffer_reward[buffer_idx, sample_idx],
                    self.buffer_new_state[buffer_idx, sample_idx, :])

        def isfilled(self):
            return all(self.filled)

        def reset(self):
            self.top = [-1]*self.NUM_BUFFERS
            self.filled = [False]*self.NUM_BUFFERS


if __name__ == '__main__':
    maze = SimpleMaze()
    agent = QAgentNN(dim_state=(1, 1, 2), range_state=((((0, 3),(0, 4)),),), actions=maze.ACTIONS,
                     learning_rate=0.01,
                     reward_scaling=100.0, reward_scaling_update='adaptive', rs_period=2,
                     batch_size=100, update_period=10,
                     freeze_period=2, memory_size=1000,
                     alpha=0.5, gamma=0.5, explore_strategy='epsilon', epsilon=0.02, verbose=2)
    print "Maze and agent initialized!"

    # logging
    path = deque()  # path in this episode
    episode_reward_rates = []
    num_episodes = 0
    cum_reward = 0
    cum_steps = 0