def main(argv=()): print("hello world") #game = make_game(int(argv[1]) if len(argv) > 1 else 0) #humanplayer_scrolly(game) env = Maze() agent = GMRAgent(actions=list(range(env.n_actions))) n_trj = 1000 reward_list = [] step_r = [] for eps in range(n_trj): observation = env.reset() step = 0 re_vec = [] r_episode = 0 while step < 200: step += 1 #env.render() #action = agent.random_action(str(observation)) state = agent.obs2state(observation) action = agent.ActAccordingToGM(state) observation_, reward, done, info = env.step(action) state_ = agent.obs2state(observation_) re_vec.append([state, action, reward, state_]) #agent.MemoryWriter(re_vec) agent.PairWriter(state, action, reward, state_) r_episode += reward step_r.append(reward) observation = observation_ if done: print("done!") break #print("re_vec",re_vec) #agent.MemoryWriter(re_vec) reward_list.append(r_episode) if len(step_r) > 12000: break # agent.plotGmemory() # print('state',state) # agent.MemoryReader(state) plt.figure(1) plt.plot(reward_list) #plt.show() plt.savefig("./RESULT/GQ/reward_list005.png") reward_list = np.array(reward_list) np.save('./RESULT/GQ/reward_list005.npy', reward_list) temp_step_r = [] for i in range(len(step_r)): if i < 300: temp_step_r.append(0) else: temp_step_r.append(sum(step_r[(i - 290):i]) / 290) #print("temp_step_r",temp_step_r) plt.figure(2) plt.plot(temp_step_r) #plt.show() plt.savefig("./RESULT/GQ/step_r005.png") temp_step_r = np.array(temp_step_r) np.save('./RESULT/GQ/temp_step_r005.npy', temp_step_r)
def main(argv=()): print("hello world") #game = make_game(int(argv[1]) if len(argv) > 1 else 0) #humanplayer_scrolly(game) env = Maze() agent = GMRAgent(actions=list(range(env.n_actions))) n_trj = 1000 for eps in range(n_trj): observation = env.reset() step = 0 re_vec = [] while step < 1000: step += 1 env.render() #action = agent.random_action(str(observation)) state = agent.obs2state(observation) action = agent.ActAccordingToGM(state) observation_, reward, done = env.step(action) state_ = agent.obs2state(observation_) re_vec.append([state, action, reward, state_]) observation = observation_ if done: print("done!") break #print("re_vec",re_vec) agent.MemoryWriter(re_vec) agent.plotGmemory() print('state', state) agent.MemoryReader(state)
def main(argv=()): print("hello world") #game = make_game(int(argv[1]) if len(argv) > 1 else 0) #humanplayer_scrolly(game) env = Maze() agent = GMRAgent(actions=list(range(env.n_actions))) #n_trj = 100 n_trj = 3 reward_list = [] step_r = [] for eps in range(n_trj): observation = env.reset() step = 0 re_vec = [] r_episode = 0 while step < 200: step += 1 env.render() #action = agent.random_action(str(observation)) state = agent.obs2state(observation) action = agent.ActAccordingToGM(state) observation_, reward, done = env.step(action) state_ = agent.obs2state(observation_) re_vec.append([state, action, reward, state_]) #agent.MemoryWriter(re_vec) agent.PairWriter(state, action, reward, state_) r_episode += reward step_r.append(reward) observation = observation_ if done: print("done!") break #print("re_vec",re_vec) #agent.MemoryWriter(re_vec) agent.plotGmemory() plt.savefig(SAVEPATH + str(eps) + "original.png") reward_list.append(r_episode) t1 = 120 t2 = 100 agent.MemoryReconstruction(t1, t2) agent.plotGmemory() plt.savefig(SAVEPATH + str(eps) + "reconstruct.png")
break reward_list.append(r_episode) plt.figure(1) plt.plot(reward_list) #plt.show() plt.savefig("./RESULT/Q/reward_list003.png") reward_list = np.array(reward_list) np.save('./RESULT/Q/reward_list003.npy', reward_list) temp_step_r = [] for i in range(len(step_r)): if i < 500: temp_step_r.append(step_r[i]) else: temp_step_r.append(sum(step_r[(i - 490):i]) / 490) #print("temp_step_r",temp_step_r) plt.figure(2) plt.plot(temp_step_r) #plt.show() plt.savefig("./RESULT/Q/step_r003.png") temp_step_r = np.array(temp_step_r) np.save('./RESULT/Q/temp_step_r003.npy', temp_step_r) if __name__ == "__main__": # env = gym.make('MontezumaRevengeNoFrameskip-v4') # main_MR() env = Maze() S_space = env.state_space() main_MAZE(env)
def main(argv=()): print("hello world") #game = make_game(int(argv[1]) if len(argv) > 1 else 0) #humanplayer_scrolly(game) env=Maze() agent= GMRAgent(actions=list(range(env.n_actions))) #n_trj = 100 n_trj = 2000 reward_list=[] step_r=[] for eps in range(n_trj): observation = env.reset() step = 0 re_vec = [] r_episode =0 while step <200: step +=1 #env.render() #action = agent.random_action(str(observation)) state = agent.obs2state(observation) action = agent.ActAccordingToGM(state) observation_, reward, done, info = env.step(action) state_ = agent.obs2state(observation_) re_vec.append([state, action, reward, state_]) #agent.MemoryWriter(re_vec) agent.PairWriter(state,action,reward,state_) r_episode += reward step_r.append(reward) observation = observation_ if done: print("done!") break #print("re_vec",re_vec) #agent.MemoryWriter(re_vec) # agent.plotGmemory() # plt.savefig(SAVEPATH+str(eps)+"original_substract.png") reward_list.append(r_episode) t1=120 t2=100 agent.MemoryReconstruction(t1,t2) # plt.figure(0) # agent.plotGmemory() # plt.savefig(SAVEPATH+str(eps)+"reconstruct_substract.png", dpi=1080) # plt.close(0) # plt.figure(3) # agent.plotAbastract() # plt.savefig(SAVEPATH+str(eps)+"abstract.png", dpi=1080) # plt.close(3) # plt.figure(4) # agent.plotReconPath() # plt.savefig(SAVEPATH+str(eps)+"reconpath.png", dpi=1080) # plt.close(4) if len(step_r)>12000: break # agent.plotGmemory() plt.figure(1) plt.plot(reward_list) #plt.show() plt.savefig(SAVEPATH+"reward_list005.png") reward_list=np.array(reward_list) np.save(SAVEPATH+'reward_list005.npy',reward_list) temp_step_r=[] for i in range(len(step_r)): if i<200 : if step_r[i]>100:# 这个纯粹为了画出来的曲线在同一个尺度上,便于和其它算法比较,没有其它含义 temp_step_r.append(0) else: temp_step_r.append(step_r[i]) else: temp_step_r.append(sum(step_r[(i-190):i])/190) #print("temp_step_r",temp_step_r) plt.figure(2) plt.plot(temp_step_r) #plt.show() plt.savefig(SAVEPATH+"step_r005.png") temp_step_r=np.array(temp_step_r) np.save(SAVEPATH+'temp_step_r005.npy',temp_step_r)