ax1.set_title("DQN") ax2.set_title("prio_DQN") ax3.set_title("prio_duel_DQN") plt.tight_layout() plt.savefig('result.jpg') # plt.show() if __name__ == "__main__": DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE, n_features=N_FEATURES, learning_rate=0.001, e_greedy=0.9, replace_target_iter=REPLACE_TARGET_ITER, memory_size=MEMORY_SIZE, batch_size=BATCH_SIZE, e_greedy_increment=0.00005, prioritized=False, dueling=False) prio_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE, n_features=N_FEATURES, learning_rate=0.001, e_greedy=0.9, replace_target_iter=REPLACE_TARGET_ITER, memory_size=MEMORY_SIZE, batch_size=BATCH_SIZE, e_greedy_increment=0.00005, prioritized=True, dueling=False)
from RL_brain import DQNPrioritizedReplay import matplotlib.pyplot as plt import tensorflow as tf import numpy as np env = gym.make('MountainCar-v0') env = env.unwrapped env.seed(21) MEMORY_SIZE = 10000 sess = tf.Session() with tf.variable_scope('natural_DQN'): RL_natural = DQNPrioritizedReplay( n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, sess=sess, prioritized=False, ) with tf.variable_scope('DQN_with_prioritized_replay'): RL_prio = DQNPrioritizedReplay( n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, sess=sess, prioritized=True, output_graph=True, ) sess.run(tf.global_variables_initializer())
entry_point='gym.envs.classic_control:CartPoleEnv', tags={'wrapper_config.TimeLimit.max_episode_steps': 10000.0}, reward_threshold=10000.0, kwargs={'change_len': 1.5}, ) env_e = gym.make('CartPole_evaluate-v0') MEMORY_SIZE = 10000 gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) with tf.variable_scope('DQN'): RL_natural = DQNPrioritizedReplay( n_actions=env.action_space.n, n_features=np.shape(env.observation_space)[0], memory_size=MEMORY_SIZE, e_greedy_increment=0.0001, sess=sess, prioritized=False, ) sess.run(tf.global_variables_initializer()) def train(RL, steps_limit): # env.render() steps_num = 0 solved = False sumreward = 1 account = 0 sess.run(tf.global_variables_initializer()) RL.initiate_common_par()
# 完成任务,或者 陷入局部最优失败 if done: env.close() print("success!") break if try_action_count > 1000: print("failed!") break if __name__ == "__main__": RL_prio = DQNPrioritizedReplay(n_actions=3, n_features=2, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, prioritized=True, dueling=False) cost_prio, his_prio = train(RL_prio) RL_dueling = DQNPrioritizedReplay(n_actions=3, n_features=2, learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, prioritized=False, dueling=True)
action = RL.choose_action(observation) observation_,reward,done = env.step(action) RL.store(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DQNPrioritizedReplay(env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 print('game over') env.destroy() if __name__ == '__main__': env = Maze() RL = DQNPrioritizedReplay( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) env.after(100, run_maze) env.mainloop() RL.plot_cost()
import gym from RL_brain import DQNPrioritizedReplay import matplotlib.pyplot as plt import numpy as np env = gym.make("MountainCar-v0") env = env.unwrapped env.seed(21) MEMORY_SIZE = 10000 RL_natural = DQNPrioritizedReplay(n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, prioritized=False) RL_prio = DQNPrioritizedReplay(n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, prioritized=True) def train(RL): total_steps = 0 steps = [] episodes = [] for i_episode in range(20): observation = env.reset() while True: # print("episode: {} | total_steps: {}".format(i_episode, total_steps)) # if total_steps - MEMORY_SIZE > 8000: env.render()
model.load_model(model_path) env = Tetris(width=TETRIS_WIDTH, height=TETRIS_HEIGHT, block_size=TETRIS_BLOCK_SIZE) env.reset() if torch.cuda.is_available(): model.cuda() while True: next_steps = env.get_next_states() next_actions, next_states = zip(*next_steps.items()) action, _ = model.choose_action(next_actions, next_states, is_random=False) _, done = env.step(action, render=True) if done: print("Cleared: {}".format(env.cleared_lines)) break if __name__ == "__main__": prio_duel_DQN = DQNPrioritizedReplay(n_actions=ACTION_SPACE, n_features=N_FEATURES, test=True) model_path = "model/dqn_tetris.pkl" test(prio_duel_DQN, model_path)
reload(lh) playerComputer = lh.BasicOpponent() env = lh.LaserHockeyEnv(mode=0) MEMORY_SIZE = 100000 Ep_max = 10000 Step_max = 500 sess = tf.Session() with tf.variable_scope('DQN_with_prioritized_replay'): RL_prio = DQNPrioritizedReplay( n_actions=8, n_features=16, memory_size=MEMORY_SIZE, e_greedy_increment=None, sess=sess, prioritized=True, output_graph=True, ) sess.run(tf.global_variables_initializer()) def train(RL): global GLOBAL_RUNNING_R total_steps = 0 steps = [] episodes = [] cost_his = [] for i_episode in range(Ep_max): ep_reward = 0