def see_path():
    '''
    show the path from q_table
    :return:
    '''
    q_table = read_table()
    env = Maze()
    agent = DQN_Agent(actions=list(range(env.n_actions)))
    agent.load_q_table(q_table)
    s = env.reset()
    while True:
        env.render()
        a = agent.get_path(s)
        # print(a)
        time.sleep(0.2)
        s_, r, done = env.step(a)
        s = s_
        if done:
            env.render()
            break
def common_check(episodes=400):
    '''
    an ordinary learning process, and store q_table
    :return:
    '''
    env = Maze()
    agent = DQN_Agent(actions=list(range(env.n_actions)))
    for episode in range(episodes):
        s = env.reset()
        episode_reward = 0
        while True:
            #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
            a = agent.choose_action(s)
            s_, r, done = env.step(a)
            q_table = agent.update_q(s, s_, a, r)
            episode_reward += r
            s = s_
            if done:
                #env.render()
                break
        print('episode:', episode, 'episode_reward:', episode_reward)
    store_table(q_table)
def check_converge_time():
    '''
    to show how many episodes needed to find the optimal path
    for the first time
    :return:
    '''
    env = Maze()
    cvg_time = 0
    for i in range(100):
        print(i)
        agent = DQN_Agent(actions=list(range(env.n_actions)))
        flag = 0
        for episode in range(300):
            if flag:
                break
            s = env.reset()
            episode_reward = 0
            while True:
                #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
                a = agent.choose_action(s)
                s_, r, done = env.step(a)
                q_table = agent.update_q(s, s_, a, r)
                episode_reward += r
                if episode_reward == 4:
                    cvg_time += episode
                    flag = 1
                s = s_
                #print(s)
                if done:
                    #env.render()
                    #time.sleep(0.5)
                    break
            # print('episode:', episode, 'episode_reward:', episode_reward)
        if flag == 0:
            cvg_time += 300
    print(cvg_time / 100)
Exemplo n.º 4
0
elif maze == '2':
    from maze_env2 import Maze

if __name__ == "__main__":
    ### START CODE HERE ###
    # This is an agent with random policy. You can learn how to interact with the environment through the code below.
    # Then you can delete it and write your own code.

    episodes = 100
    model_based_episodes = 5
    env = Maze()
    model = Model(actions=list(range(env.n_actions)))
    agent = Agent(actions=list(range(
        env.n_actions)))  # 从range(4),也就是0,1,2,3(上下右左)四个行为中选择
    for episode in range(episodes):  # 对于每一段,从开始到结束
        s = env.reset()
        episode_reward = 0
        while True:
            #env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.

            # move one step
            a = agent.choose_action(str(s))

            s_, r, done = env.step(a)

            # update Q model-free
            agent.learn(str(s), a, r, str(s_), done)

            model.store_transition(str(s), a, r, s_)

            # update Q model-based