def train_double_dqn(env, num_actions): results_dir = './results/double_dqn/wind_tunnel' training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 1 dqn = atari_dqn.AtariDQN(frame_history, num_actions) agent = dq_learner.DQLearner(dqn, num_actions, frame_history=frame_history, epsilon_end=training_epsilon) train(agent, env, test_epsilon, results_dir)
def train_dqn(env, num_actions): results_dir = './results/dqn/coin_game' training_epsilon = 0.1 test_epsilon = 0.05 frame_history = 1 dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history) train(agent, env, test_epsilon, results_dir)
def train_tabular_dqn(env, num_actions): results_dir = './results/dqn/tab_coin_game_lr0.0025_rp10000' training_epsilon = 0.1 test_epsilon = 0.05 n = 3 frame_history = 1 dqn = tabular_dqn.TabularDQN(n, frame_history, num_actions, shared_bias=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=3000, epsilon_end=training_epsilon, double=False, frame_history=frame_history, learning_rate=0.0025, replay_start_size=10000, epsilon_steps=100000., replay_memory_size=10001) train(agent, env, test_epsilon, results_dir)