def main(env, num_timesteps): def stopping_criterion(env): return get_wrapper_by_name( env, "Monitor").get_total_steps() >= num_timesteps optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(2000000, 0.05) dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, )
def main(env, num_timesteps, config): def stopping_criterion(env): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name( env, "Monitor").get_total_steps() >= num_timesteps optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(1000000, 0.1) dqn_learing( config=config, env=env, q_func=VIN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, )
def main(env, num_timesteps): def stopping_criterion(env): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) run = runs.runs[RUN_INDEX] exploration_schedule = run.schedule print("Starting {}; max_timesteps = {}".format(run.run_name, task.max_timesteps)) dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, statistics_file_name=run.statistics_file_name )
def main(env, num_timesteps, experiment_config, experiment_name): q_func = DQNLRelu if experiment_config['adv_model'] else DQN def stopping_criterion(env): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name( env, "Monitor").get_total_steps() >= num_timesteps optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=experiment_config['lr'], alpha=experiment_config['alpha'], eps=experiment_config['eps']), ) exploration_schedule = LinearSchedule(1000000, experiment_config['min_eps']) dqn_learing(experiment_name=experiment_name, env=env, q_func=q_func, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=experiment_config['replay_size'], batch_size=experiment_config['batch'], gamma=experiment_config['gamma'], learning_starts=experiment_config['learning_start'], learning_freq=experiment_config['learning_freq'], frame_history_len=experiment_config['frame_hist'], target_update_freq=experiment_config['target_update_freq'], output_path=experiment_config['output'])
def main(env): global args args = parser.parse_args() optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(1000000, 0.1) dqn_learing( env=env, q_func=DQN, checkpoint_path=args.checkpoint, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=None, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGET_UPDATE_FREQ, )
def main(env, num_timesteps): # This is just a rough estimate num_iterations = float(num_timesteps) / 4.0 # define learning rate and exploration schedules below lr_multiplier = 1.0 lr_schedule = PiecewiseSchedule([ (0, 1e-4 * lr_multiplier), (num_iterations / 10, 1e-4 * lr_multiplier), (num_iterations / 2, 5e-5 * lr_multiplier), ], outside_value=5e-5 * lr_multiplier) optimizer_spec = OptimizerSpec( constructor=optim.Adam, kwargs=dict(eps=1e-4), lr_schedule=lr_schedule ) exploration_schedule = PiecewiseSchedule([ (0, 1.0), (1e6, 0.1), (num_iterations / 2, 0.01), ], outside_value=0.01) dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion(num_timesteps), replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, grad_norm_clipping=GRAD_NORM_CLIPPING )
def main(env): optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(1000000, 0.1) dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, num_actions1=num_actions1, num_actions2=num_actions2 )
def main(env, num_timesteps): # Change the index to select a different game. task = benchmark.tasks[3] # Run training seed = random.randint(0,100) # Use a seed of zero (you may want to randomize the seed!) env = get_env(task, seed) def stopping_criterion(env): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) exploration_schedule = LinearSchedule(1000000, 0.1) # empty dict to hold all results Stats = {} new_lr = 0.001 new_gamma = 0.999 exploration_sches = [LinearSchedule(1000000, 0.1), ConstantSchedule(0.05), ConstantSchedule(0.15), LinearSchedule(500000, 0.05)] optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=new_lr, alpha=ALPHA, eps=EPS), ) env = get_env(task, seed) Stats["lr=0.001, gamma=0.999"] = dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=new_gamma, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, feature_tested="lr=0.001, gamma=0.999" ) optimizer_spec = OptimizerSpec( constructor=optim.RMSprop, kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS), ) env = get_env(task, seed) Stats["Default"] = dqn_learing( env=env, q_func=DQN, optimizer_spec=optimizer_spec, exploration=exploration_schedule, stopping_criterion=stopping_criterion, replay_buffer_size=REPLAY_BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, learning_starts=LEARNING_STARTS, learning_freq=LEARNING_FREQ, frame_history_len=FRAME_HISTORY_LEN, target_update_freq=TARGER_UPDATE_FREQ, feature_tested="" ) plt.clf() plt.xlabel('Timesteps') plt.ylabel('Mean Reward (past 100 episodes)') num_items = len(Stats["lr=0.001, gamma=0.999"]["mean_episode_rewards"]) plt.plot(range(num_items), Stats["lr=0.001, gamma=0.999"]["mean_episode_rewards"], label="lr=0.001, gamma=0.999") num_items = len(Stats["Default"]["mean_episode_rewards"]) plt.plot(range(num_items), Stats["Default"]["mean_episode_rewards"], label="Default") plt.legend() plt.title("Performance") plt.savefig('Final-Performance.png')