Пример #1
0
def main(argv=()):
    print("hello world")
    #game = make_game(int(argv[1]) if len(argv) > 1 else 0)
    #humanplayer_scrolly(game)
    env = Maze()
    agent = GMRAgent(actions=list(range(env.n_actions)))
    n_trj = 1000
    reward_list = []
    step_r = []
    for eps in range(n_trj):
        observation = env.reset()
        step = 0
        re_vec = []
        r_episode = 0
        while step < 200:
            step += 1
            #env.render()
            #action = agent.random_action(str(observation))
            state = agent.obs2state(observation)
            action = agent.ActAccordingToGM(state)
            observation_, reward, done, info = env.step(action)
            state_ = agent.obs2state(observation_)
            re_vec.append([state, action, reward, state_])
            #agent.MemoryWriter(re_vec)
            agent.PairWriter(state, action, reward, state_)
            r_episode += reward
            step_r.append(reward)
            observation = observation_
            if done:
                print("done!")
                break
        #print("re_vec",re_vec)
        #agent.MemoryWriter(re_vec)
        reward_list.append(r_episode)
        if len(step_r) > 12000:
            break

    # agent.plotGmemory()
    # print('state',state)
    # agent.MemoryReader(state)
    plt.figure(1)
    plt.plot(reward_list)
    #plt.show()
    plt.savefig("./RESULT/GQ/reward_list005.png")
    reward_list = np.array(reward_list)
    np.save('./RESULT/GQ/reward_list005.npy', reward_list)

    temp_step_r = []
    for i in range(len(step_r)):
        if i < 300:
            temp_step_r.append(0)
        else:
            temp_step_r.append(sum(step_r[(i - 290):i]) / 290)
    #print("temp_step_r",temp_step_r)
    plt.figure(2)
    plt.plot(temp_step_r)
    #plt.show()
    plt.savefig("./RESULT/GQ/step_r005.png")
    temp_step_r = np.array(temp_step_r)
    np.save('./RESULT/GQ/temp_step_r005.npy', temp_step_r)
Пример #2
0
def main(argv=()):
    print("hello world")
    #game = make_game(int(argv[1]) if len(argv) > 1 else 0)
    #humanplayer_scrolly(game)
    env = Maze()
    agent = GMRAgent(actions=list(range(env.n_actions)))
    n_trj = 1000
    for eps in range(n_trj):
        observation = env.reset()
        step = 0
        re_vec = []
        while step < 1000:
            step += 1
            env.render()
            #action = agent.random_action(str(observation))
            state = agent.obs2state(observation)
            action = agent.ActAccordingToGM(state)
            observation_, reward, done = env.step(action)
            state_ = agent.obs2state(observation_)
            re_vec.append([state, action, reward, state_])
            observation = observation_
            if done:
                print("done!")
                break
        #print("re_vec",re_vec)
        agent.MemoryWriter(re_vec)

    agent.plotGmemory()
    print('state', state)
    agent.MemoryReader(state)
Пример #3
0
def main(argv=()):
    print("hello world")
    #game = make_game(int(argv[1]) if len(argv) > 1 else 0)
    #humanplayer_scrolly(game)
    env = Maze()
    agent = GMRAgent(actions=list(range(env.n_actions)))
    #n_trj = 100
    n_trj = 3
    reward_list = []
    step_r = []
    for eps in range(n_trj):
        observation = env.reset()
        step = 0
        re_vec = []
        r_episode = 0
        while step < 200:
            step += 1
            env.render()
            #action = agent.random_action(str(observation))
            state = agent.obs2state(observation)
            action = agent.ActAccordingToGM(state)
            observation_, reward, done = env.step(action)
            state_ = agent.obs2state(observation_)
            re_vec.append([state, action, reward, state_])
            #agent.MemoryWriter(re_vec)
            agent.PairWriter(state, action, reward, state_)
            r_episode += reward
            step_r.append(reward)
            observation = observation_
            if done:
                print("done!")
                break
        #print("re_vec",re_vec)
        #agent.MemoryWriter(re_vec)
        agent.plotGmemory()
        plt.savefig(SAVEPATH + str(eps) + "original.png")
        reward_list.append(r_episode)
        t1 = 120
        t2 = 100
        agent.MemoryReconstruction(t1, t2)
        agent.plotGmemory()
        plt.savefig(SAVEPATH + str(eps) + "reconstruct.png")
Пример #4
0
                break

        reward_list.append(r_episode)
    plt.figure(1)
    plt.plot(reward_list)
    #plt.show()
    plt.savefig("./RESULT/Q/reward_list003.png")
    reward_list = np.array(reward_list)
    np.save('./RESULT/Q/reward_list003.npy', reward_list)

    temp_step_r = []
    for i in range(len(step_r)):
        if i < 500:
            temp_step_r.append(step_r[i])
        else:
            temp_step_r.append(sum(step_r[(i - 490):i]) / 490)
    #print("temp_step_r",temp_step_r)
    plt.figure(2)
    plt.plot(temp_step_r)
    #plt.show()
    plt.savefig("./RESULT/Q/step_r003.png")
    temp_step_r = np.array(temp_step_r)
    np.save('./RESULT/Q/temp_step_r003.npy', temp_step_r)


if __name__ == "__main__":
    # env = gym.make('MontezumaRevengeNoFrameskip-v4')
    # main_MR()
    env = Maze()
    S_space = env.state_space()
    main_MAZE(env)
def main(argv=()):
    print("hello world")
    #game = make_game(int(argv[1]) if len(argv) > 1 else 0)
    #humanplayer_scrolly(game)
    env=Maze()
    agent= GMRAgent(actions=list(range(env.n_actions)))
    #n_trj = 100
    n_trj = 2000
    reward_list=[]
    step_r=[]
    for eps in range(n_trj):
        observation = env.reset()
        step = 0
        re_vec = []
        r_episode =0
        while step <200:
            step +=1
            #env.render()
            #action = agent.random_action(str(observation))
            state = agent.obs2state(observation)
            action = agent.ActAccordingToGM(state)
            observation_, reward, done, info = env.step(action)
            state_ = agent.obs2state(observation_)
            re_vec.append([state, action, reward, state_])
            #agent.MemoryWriter(re_vec)
            agent.PairWriter(state,action,reward,state_)
            r_episode += reward
            step_r.append(reward)
            observation = observation_
            if done:
                print("done!")
                break
        #print("re_vec",re_vec)
        #agent.MemoryWriter(re_vec)
        # agent.plotGmemory()
        # plt.savefig(SAVEPATH+str(eps)+"original_substract.png")
        reward_list.append(r_episode)
        t1=120
        t2=100
        agent.MemoryReconstruction(t1,t2)
        # plt.figure(0)
        # agent.plotGmemory()
        # plt.savefig(SAVEPATH+str(eps)+"reconstruct_substract.png", dpi=1080)
        # plt.close(0)
        # plt.figure(3)
        # agent.plotAbastract()
        # plt.savefig(SAVEPATH+str(eps)+"abstract.png", dpi=1080)
        # plt.close(3)
        # plt.figure(4)
        # agent.plotReconPath()
        # plt.savefig(SAVEPATH+str(eps)+"reconpath.png", dpi=1080)
        # plt.close(4)
        if len(step_r)>12000:
            break
    # agent.plotGmemory()


    plt.figure(1)
    plt.plot(reward_list)
    #plt.show()
    plt.savefig(SAVEPATH+"reward_list005.png")
    reward_list=np.array(reward_list)
    np.save(SAVEPATH+'reward_list005.npy',reward_list)
    temp_step_r=[]
    for i in range(len(step_r)):
        if i<200 :
            if step_r[i]>100:# 这个纯粹为了画出来的曲线在同一个尺度上,便于和其它算法比较,没有其它含义
                temp_step_r.append(0)
            else:
                temp_step_r.append(step_r[i])
        else:
            temp_step_r.append(sum(step_r[(i-190):i])/190)
    #print("temp_step_r",temp_step_r)
    plt.figure(2)
    plt.plot(temp_step_r)
    #plt.show()
    plt.savefig(SAVEPATH+"step_r005.png")
    temp_step_r=np.array(temp_step_r)
    np.save(SAVEPATH+'temp_step_r005.npy',temp_step_r)