observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action) position, velocity = observation_ reward = abs(position - (-0.5)) # r in [0, 1] RL.store_transition(observation, action, reward, observation_) ep_r += reward if total_steps > 1000: RL.learn() if done: get = '| Get' if observation_[ 0] >= env.unwrapped.goal_position else '| ----' print('Epi: ', i_episode, get, '| Ep_r: ', round(ep_r, 4), '| Epsilon: ', round(RL.epsilon, 2)) break observation = observation_ total_steps += 1 RL.plot_cost()
# break while loop when end of this episode if done: print('game over') print(env.balance) break step += 1 # end of game #env.destroy() if __name__ == "__main__": # maze game env = FX() DQN = DQN( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) # env.after(100, run_maze) run_maze() # env.mainloop() DQN.plot_cost()