def test_descr(self): params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])]) experiments = [ Experiment('test1', 'python super_rl1.py', params.generate_params(randomize=False)), Experiment('test2', 'python super_rl2.py', params.generate_params(randomize=False)), ] rd = RunDescription('test_run', experiments) cmds = rd.generate_experiments() for command, name, root_dir, env_vars in cmds: exp_name = split(root_dir)[-1] self.assertIn('--experiment', command) self.assertIn('--experiments_root', command) self.assertTrue(exp_name in name) self.assertTrue(root_dir.startswith('test_run'))
def test_simple_cmd(self): logging.disable(logging.INFO) echo_params = ParamGrid([ ('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c']), ('p3', list(np.arange(3))), ]) experiments = [ Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)), Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)), ] train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests')) root_dir_name = '__test_run__' rd = RunDescription(root_dir_name, experiments, train_dir) args = runner_argparser().parse_args([]) args.max_parallel = 8 args.pause_between = 0 run(rd, args) logging.disable(logging.NOTSET) shutil.rmtree(join(train_dir, root_dir_name))
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [0, 1111, 2222, 3333, 4444, 5555, 6666, 7777, 8888, 9999]), ('env', [ 'doom_my_way_home', 'doom_deadly_corridor', 'doom_defend_the_center', 'doom_defend_the_line', 'doom_health_gathering', 'doom_health_gathering_supreme' ]), ]) _experiments = [ Experiment( 'basic_envs_fs4', 'python -m algorithms.appo.train_appo --train_for_env_steps=500000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=36 --num_envs_per_worker=8 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False', _params.generate_params(randomize=False), ), ] RUN_DESCRIPTION = RunDescription('paper_doom_basic_envs_appo_v97_fs4', experiments=_experiments)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('ppo_epochs', [1]), ]) _experiments = [ Experiment( 'battle_d4_fs4_pbt', 'python -m algorithms.appo.train_appo --env=doom_battle_d4 --train_for_env_steps=50000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --reward_scale=0.5 --num_workers=24 --num_envs_per_worker=30 --num_policies=4 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False --pbt_period_env_steps=5000000', _params.generate_params(randomize=False), ), ] RUN_DESCRIPTION = RunDescription('doom_battle_d4_appo_v64_fs4_pbt', experiments=_experiments, use_gpus=2, experiments_per_gpu=-1, max_parallel=1)
'python -m algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=True', _params.generate_params(randomize=False), dict(DOOM_DEFAULT_UDP_PORT=35300), ), Experiment( 'bots_128_fs2_narrow', 'python -m algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=False', _params.generate_params(randomize=False), dict(DOOM_DEFAULT_UDP_PORT=40300), ), Experiment( 'bots_128_fs2_wide_adam0.5', 'python -m algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=True --adam_beta1=0.5', _params.generate_params(randomize=False), dict(DOOM_DEFAULT_UDP_PORT=45300), ), Experiment( 'bots_128_fs2_narrow_adam0.5', 'python -m algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=False --adam_beta1=0.5', _params.generate_params(randomize=False), dict(DOOM_DEFAULT_UDP_PORT=50300), ), ] RUN_DESCRIPTION = RunDescription('doom_bots_v60_sweep', experiments=_experiments, pause_between_experiments=120, use_gpus=4, experiments_per_gpu=1, max_parallel=4)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [0000, 1111, 2222, 3333]), ]) _experiment = Experiment( '8_bezier_col_penalties', 'python -m run_algorithm --env=quadrotor_multi --train_for_env_steps=2000000000 --algo=APPO --use_rnn=False --num_workers=36 --num_envs_per_worker=4 --learning_rate=0.0001 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=5.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --extend_obs=True --quads_use_numba=True --quads_num_agents=8 --quads_episode_duration=15.0 --quads_mode=ep_rand_bezier --quads_dist_between_goals=0.0 --quads_collision_reward=1.0 --encoder_custom=quad_multi_encoder --with_pbt=False', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_bezier_v112', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_static_goal_3d_sphere_vel --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_static_goal_3d_sphere_vel --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [0000, 1111, 2222, 3333]), ]) _experiment = Experiment( 'one_static_goal-mean_embedding-collision_0.1-agents_6', 'python -m run_algorithm --env=quadrotor_multi --train_for_env_steps=1000000000 --algo=APPO --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --learning_rate=0.0001 --adam_eps=1e-8 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=64 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=0.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --extend_obs=True --quads_use_numba=True --quads_num_agents=6 --quads_episode_duration=7.0 --quads_mode=static_same_goal --quads_formation_size=0.0 --quads_collision_reward=0.1 --encoder_custom=quad_multi_encoder', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_same_goal_v112', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_static_goal --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_static_goal --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [0000, 1111, 2222]), ('quads_settle_range_meters', [0.5, 1.0, 1.5]), ('quads_vel_reward_out_range', [1.0, 2.0, 5.0]), ]) _experiment = Experiment( '8_static_goal-3d_sphere', 'python -m run_algorithm --env=quadrotor_multi --train_for_env_steps=2000000000 --algo=APPO --use_rnn=False --num_workers=36 --num_envs_per_worker=4 --learning_rate=0.0001 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=5.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --extend_obs=True --quads_use_numba=True --quads_num_agents=8 --quads_episode_duration=15.0 --quads_mode=static_diff_goal --quads_formation=sphere --quads_formation_size=0.25 --quads_collision_reward=1.0 --quads_settle=True --encoder_custom=quad_multi_encoder --with_pbt=False --experiment=quads_multi_same_goal_gridsearch_v112', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_static_goal_grid_v112', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_static_goal_3d_sphere_vel --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_static_goal_3d_sphere_vel --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
ACTOR_GPUS = '0' # replace with '0 1 2 3 4 5 6 7' for 8-GPU server NUM_POLICIES = 1 _basic_cli = f'python -m algorithms.appo.train_appo --train_for_seconds={TIMEOUT_SECONDS} --train_for_env_steps=20000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers={NUM_WORKERS} --num_envs_per_worker=16 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --num_policies={NUM_POLICIES} --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --policy_workers_per_policy=2 --learner_main_loop_num_cores=4 --reward_clip=30' _params_basic_envs = ParamGrid([ ('env', ['doom_benchmark', 'atari_breakout', 'dmlab_benchmark']), ]) _experiment_basic_envs = Experiment( 'benchmark_basic_envs', _basic_cli, _params_basic_envs.generate_params(randomize=False), ) _voxel_cli = f'python -m algorithms.appo.train_appo --train_for_seconds={TIMEOUT_SECONDS} --train_for_env_steps=20000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus {ACTOR_GPUS} --num_policies={NUM_POLICIES} --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=2 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4 --reward_clip=30 --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1 --pbt_mix_policies_in_one_env=False' _params_voxel_env = ParamGrid([ ('env', ['voxel_env_obstacleshard']), ('voxel_use_vulkan', [True, False]), ]) _experiment_voxel_env = Experiment( 'benchmark_voxel_env', _voxel_cli, _params_voxel_env.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription( 'voxel_train_benchmark', experiments=[_experiment_basic_envs, _experiment_voxel_env])
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('ppo_epochs', [1]), ]) _experiment = Experiment( 'quads_pbt', 'python -m algorithms.appo.train_appo --env=quadrotor_single --train_for_seconds=3600000 --algo=APPO --gamma=0.99 --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --num_policies=8 --ppo_epochs=1 --rollout=128 --recurrence=1 --batch_size=512 --benchmark=False --pbt_replace_reward_gap=0.1 --pbt_replace_reward_gap_absolute=200.0 --pbt_period_env_steps=1000000 --pbt_start_mutation=20000000 --with_pbt=True --adam_eps=1e-8 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=64 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=0.0 --ppo_clip_value=5.0 --entropy_loss_coeff=0.00001 --learning_rate=5e-4', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_single_pbt_v96_v2', experiments=[_experiment])
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [1111, 2222, 3333, 4444]), ]) _experiments = [ Experiment( 'battle_fs4', 'python -m algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=4000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=72 --num_envs_per_worker=8 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False --max_grad_norm=0.0', _params.generate_params(randomize=False), ), ] RUN_DESCRIPTION = RunDescription('paper_doom_battle_appo_v108_fs4', experiments=_experiments)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [0000, 1111, 2222, 3333, 4444, 5555, 6666, 7777]), ('batch_size', [256]), ('ppo_epochs', [1]), ('nonlinearity', ['tanh']), ('learning_rate', [1e-4]), ('entropy_loss_coeff', [0.0005]), ('actor_critic_share_weights', ['False']), ('policy_initialization', ['xavier_uniform']), ('max_policy_lag', [50]), ('adaptive_stddev', ['False']), ('initial_stddev', [1.0]), ('hidden_size', [64]), ]) _experiment = Experiment( 'quads_gridsearch', 'python -m run_algorithm --env=quadrotor_single --train_for_env_steps=1000000000 --algo=APPO --gamma=0.99 --use_rnn=False --num_workers=36 --num_envs_per_worker=2 --num_policies=1 --rollout=32 --recurrence=32 --benchmark=False --with_pbt=False --ppo_clip_ratio=0.05', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_single_gridsearch_v89_seeds', experiments=[_experiment])
from runner.runs.quad_multi_mix_baseline import QUAD_BASELINE_CLI _params = ParamGrid([ ('quads_collision_falloff_radius', [4.0]), ('quads_collision_reward', [5.0]), ('quads_collision_smooth_max_penalty', [10.0]), ('quads_neighbor_encoder_type', ['attention', 'mean_embed']), ('replay_buffer_sample_prob', [0.75]), ('anneal_collision_steps', [0, 200000000]), ]) _experiment = Experiment( 'quad_mix_baseline-8_mixed_replay', QUAD_BASELINE_CLI, _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_mix_baseline_8a_replay_v2_v115', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
('quads_obstacle_num', [1]), ('quads_obstacle_type', ['sphere']), ('quads_obstacle_traj', ['mix']), ('quads_collision_obstacle_reward', [5.0]), ('quads_obstacle_obs_mode', ['absolute']), ('quads_collision_obst_smooth_max_penalty', [10.0]), ('quads_obstacle_hidden_size', [256]), ('replay_buffer_sample_prob', [0.0]), ('quads_obst_penalty_fall_off', [10.0]), ]) _experiment = Experiment( 'quad_mix_baseline_obst_mix-8a', QUAD_BASELINE_CLI, _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_mix_obst_mix_8a_v115', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_mix_baseline_obstacle_mix --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_mix_baseline_obstacle_mix --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
from runner.run_description import RunDescription, Experiment, ParamGrid NUM_WORKERS_VOXEL_ENV = 48 # typically num logical cores / 2, limited by the num of available Vulkan contexts TIMEOUT_SECONDS = 180 SAMPLER_GPUS = '0 1 2 3 4 5 6 7' # replace with '0 1 2 3 4 5 6 7' for 8-GPU server _voxel_env_cli = f'python -m run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=1 --experiment=benchmark --sampler_worker_gpus {SAMPLER_GPUS} --voxel_num_envs_per_instance=64 --voxel_num_agents_per_env=2 --voxel_num_simulation_threads=2 --timeout_seconds={TIMEOUT_SECONDS}' _params_voxel_env = ParamGrid([ ('env', [ 'voxel_env_TowerBuilding', 'voxel_env_ObstaclesEasy', 'voxel_env_ObstaclesHard', 'voxel_env_Collect', 'voxel_env_Sokoban', 'voxel_env_HexMemory', 'voxel_env_HexExplore', 'voxel_env_Rearrange' ]), ('voxel_use_vulkan', [True]), ]) _experiment_voxel_env = Experiment( 'benchmark_voxel_env_8', _voxel_env_cli, _params_voxel_env.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('voxel_bench_sampling_all_envs', experiments=[_experiment_voxel_env])
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('ppo_epochs', [1]), ]) _experiment = Experiment( 'bots_freedm_fs2', 'python -m algorithms.appo.train_appo --env=doom_freedm --train_for_seconds=360000 --algo=APPO --gamma=0.995 --env_frameskip=2 --use_rnn=True --reward_scale=0.5 --num_workers=20 --num_envs_per_worker=4 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --benchmark=False --start_bot_difficulty=150', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('doom_freedm_v64_fs2', experiments=[_experiment], pause_between_experiments=100, use_gpus=1, experiments_per_gpu=-1, max_parallel=1)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [1111, 3333]), ('quads_collision_reward', [0.0, 1.0, 2.0, 5.0]), ]) _experiment = Experiment( '8_static_goal_sphere', 'python -m run_algorithm --env=quadrotor_multi --train_for_env_steps=2000000000 --algo=APPO --use_rnn=False --num_workers=36 --num_envs_per_worker=4 --learning_rate=0.0001 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=5.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --extend_obs=True --quads_use_numba=True --quads_num_agents=8 --quads_episode_duration=15.0 --quads_mode=static_diff_goal --quads_formation_size=0.25 --encoder_custom=quad_multi_encoder --quads_formation=sphere --with_pbt=False', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription( 'quads_multi_static_goal_sphere_8_col_rew_v112', experiments=[_experiment]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_static_goal_3d_sphere --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_static_goal_3d_sphere --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('env', [ 'voxel_env_rearrange', 'voxel_env_collect', 'voxel_env_obstaclesEasy', 'voxel_env_hexMemory' ]), ('voxel_num_simulation_threads', [1]), ('rnn_num_layers', [2]), ]) _experiment = Experiment( 'voxel_env_pbt', 'python -m algorithms.appo.train_appo --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('voxel_env_v114_env_v52', experiments=[_experiment])
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('ppo_epochs', [1]), ]) _experiments = [ Experiment( 'battle_fs4_100', 'python -m algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=10 --num_envs_per_worker=10 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False', _params.generate_params(randomize=False), ), Experiment( 'battle_fs4_400', 'python -m algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False', _params.generate_params(randomize=False), ), Experiment( 'battle_fs4_800', 'algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=40 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False', _params.generate_params(randomize=False), ), ] RUN_DESCRIPTION = RunDescription('paper_policy_lag_v66_fs4', experiments=_experiments)
from runner.runs.quad_multi_mix_baseline import QUAD_BASELINE_CLI _params = ParamGrid([ ('seed', [0000, 1111, 2222, 3333]), ]) _experiment_no_replay = Experiment( 'quad_mix_baseline-8_mixed_noreplay', QUAD_BASELINE_CLI + ' --replay_buffer_sample_prob=0.00', _params.generate_params(randomize=False), ) _experiment_no_anneal = Experiment( 'quad_mix_baseline-8_mixed_noannealing', QUAD_BASELINE_CLI + ' --anneal_collision_steps=0', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('paper_quads_multi_mix_baseline_8a_ablation_v115', experiments=[_experiment_no_replay, _experiment_no_anneal]) # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
_params = ParamGrid([ ('seed', [0000, 1111, 2222, 3333]), ('quads_num_agents', [4]), ('quads_episode_duration', [7.0]), ('quads_mode', ['sanity_check']), ('quads_dist_between_goals', [0.0]), ('quads_collision_reward', [0.0]), ]) _experiment = Experiment( 'quads_multi_numba_extend_obs', 'python -m run_algorithm --env=quadrotor_multi --train_for_env_steps=1000000000 --algo=APPO --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --learning_rate=0.0001 --adam_eps=1e-8 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=64 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=0.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=512 --extend_obs=True --quads_use_numba=True', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_multi_sanity_check_v112', experiments=[_experiment]) # Note: Before using this script and running the script, I would recommend you # reading the spreadsheet, page: sanity_check, first. # On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable, # then, try to use two commands below # export OMP_NUM_THREADS=1 # export USE_SIMPLE_THREADED_LEVEL3=1 # Command to use this script on server: # xvfb-run python -m runner.run --run=quad_multi_sanity_check --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4 # Command to use this script on local machine: # Please change num_workers to the physical cores of your local machine # python -m runner.run --run=quad_multi_sanity_check --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('env', [ 'MiniGrid-MemoryS7-v0', 'MiniGrid-RedBlueDoors-8x8-v0', 'MiniGrid-MemoryS17Random-v0' ]), ]) _experiment = Experiment( 'mem_minigrid_obs', 'python -m train_pytorch --algo=PPO --rollout=64 --num_envs=96 --recurrence=1 --use_rnn=False --train_for_env_steps=200000000 --prior_loss_coeff=0.005 --obs_mem=True', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('mem_minigrid_obs_v26', experiments=[_experiment], pause_between_experiments=5, use_gpus=2, experiments_per_gpu=4, max_parallel=12)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('seed', [1111, 2222, 3333, 4444, 5555]), ('env', ['doom_health_gathering_supreme']), ]) _experiments = [ Experiment( 'health_0_255', 'python -m algorithms.appo.train_appo --train_for_env_steps=40000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=12 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False', _params.generate_params(randomize=False), ), Experiment( 'health_128_128', 'python -m algorithms.appo.train_appo --train_for_env_steps=40000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=12 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False --obs_subtract_mean=128.0 --obs_scale=128.0', _params.generate_params(randomize=False), ), ] RUN_DESCRIPTION = RunDescription('doom_health_gathering_v97_fs4', experiments=_experiments)
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('env', ['voxel_env_multitask_Obstacles']), ('use_cpc', ['True']), ('seed', [11111, 22222, 33333, 44444, 55555]), ]) _cli = 'python -m algorithms.appo.train_appo --train_for_seconds=360000000 --train_for_env_steps=10000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=2 --reward_clip=30 --pbt_mix_policies_in_one_env=False' EXPERIMENT_1AGENT = Experiment( 'voxel_env_multitask_obs', _cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('voxel_env_v115_multitask_obstacles_v55', experiments=[EXPERIMENT_1AGENT])
from runner.run_description import RunDescription, Experiment, ParamGrid _params_earlystop = ParamGrid([ ('seed', [0000, 1111, 2222, 3333, 4444]), ]) _experiment_earlystop = Experiment( 'lunar_lander_cont', 'python -m examples.train_gym_env --train_for_env_steps=500000000 --algo=APPO --num_workers=20 --num_envs_per_worker=6 --seed 0 --gae_lambda 0.99 --experiment=lunar_lander_2 --env=gym_LunarLanderContinuous-v2 --exploration_loss_coeff=0.0 --max_grad_norm=0.0 --encoder_type=mlp --encoder_subtype=mlp_mujoco --encoder_extra_fc_layers=0 --hidden_size=128 --policy_initialization=xavier_uniform --actor_critic_share_weights=False --adaptive_stddev=False --recurrence=1 --use_rnn=False --batch_size=256 --ppo_epochs=4 --with_vtrace=False --reward_scale=0.05 --max_policy_lag=100000 --save_every_sec=15 --experiment_summaries_interval=10', _params_earlystop.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('lunar_lander_cont_v100', experiments=[_experiment_earlystop])
from runner.run_description import RunDescription from runner.runs.voxel_base_experiments import EXPERIMENT_1AGENT RUN_DESCRIPTION = RunDescription('voxel_env_v115_single_v55', experiments=[EXPERIMENT_1AGENT])
('ppo_clip_value', [10]), ('with_vtrace', ['False']), ('learning_rate', [0.0001]), ('max_grad_norm', [100.0]), ('use_rnn', ['False']), ('recurrence', [1]), ('num_minibatches_to_accumulate', [0]), ('device', ['gpu']), ('actor_critic_share_weights', ['False']), ('max_policy_lag', [1000000]), ('adaptive_stddev', ['False']), ('ppo_epochs', [20]), ('ppo_clip_ratio', [0.3]), ('batch_size', [1024]), ('num_batches_per_iteration', [10]), ('rollout', [128]), ('nonlinearity', ['tanh']), ('exploration_loss_coeff', [0.0]), ]) _experiment = Experiment( 'mujoco_hopper', 'python -m run_algorithm --env=mujoco_hopper --train_for_env_steps=7000000 --algo=APPO --num_workers=16 --num_envs_per_worker=4 --benchmark=False --with_pbt=False', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('mujoco_hopper_v94', experiments=[_experiment]) # python -m runner.run --run=mujoco_halfcheetah_grid_search --runner=processes --max_parallel=8 --pause_between=1 --experiments_per_gpu=10000 --num_gpus=1
from runner.run_description import RunDescription from runner.runs.voxel_base_experiments import EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS RUN_DESCRIPTION = RunDescription( 'voxel_env_v115_multi_agent_v55', experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])
from runner.run_description import RunDescription, Experiment, ParamGrid _params = ParamGrid([ ('ppo_epochs', [1]), ]) _experiment = Experiment( 'voxel_env_pbt', 'python -m algorithms.appo.train_appo --env=voxel_env_v23_v --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=28 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 1 2 3 4 5 6 7 --num_policies=8 --with_pbt=True --max_grad_norm=0.0 --pbt_replace_reward_gap_absolute=0.3 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --pbt_mix_policies_in_one_env=False --experiment=voxel_env_v23_v --voxel_num_envs_per_instance=48 --voxel_num_agents_per_env=4 --voxel_num_simulation_threads=4 --voxel_vertical_look_limit=0.2 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4', _params.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('voxel_env_pbt_v112_env_v23_8p', experiments=[_experiment])
from runner.run_description import RunDescription, Experiment, ParamGrid _params_earlystop = ParamGrid([ ('batch_size', [140]), ('learning_rate', [0.001, 0.0004, 0.0002, 0.0001, 0.00005]), ('ppo_epochs', [4, 8]), ('num_batches_per_iteration', [50]), ('exploration_loss_coeff', [0.0]), ('quads_effort_reward', [0.05]), ]) _experiment_earlystop = Experiment( 'quads_gridsearch_earlystop', 'python -m run_algorithm --env=quadrotor_single --train_for_env_steps=1000000000 --algo=APPO --gamma=0.99 --use_rnn=False --num_workers=24 --num_envs_per_worker=2 --num_policies=1 --rollout=700 --recurrence=1 --benchmark=False --with_pbt=False --ppo_clip_ratio=0.05 --batch_size=128 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --device=cpu --max_grad_norm=0.0 --num_minibatches_to_accumulate=0', _params_earlystop.generate_params(randomize=False), ) RUN_DESCRIPTION = RunDescription('quads_single_gridsearch_v93_h256', experiments=[_experiment_earlystop])