示例#1
0
    def test_simple_cmd(self):
        logging.disable(logging.INFO)

        echo_params = ParamGrid([
            ('p1', [3.14, 2.71]),
            ('p2', ['a', 'b', 'c']),
            ('p3', list(np.arange(3))),
        ])
        experiments = [
            Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)),
            Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)),
        ]
        train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests'))
        root_dir_name = '__test_run__'
        rd = RunDescription(root_dir_name, experiments)

        args = runner_argparser().parse_args([])
        args.max_parallel = 8
        args.pause_between = 0
        args.train_dir = train_dir

        run(rd, args)

        rd2 = RunDescription(root_dir_name, experiments, experiment_dirs_sf_format=False, experiment_arg_name='--experiment_tst', experiment_dir_arg_name='--dir')
        run(rd2, args)

        logging.disable(logging.NOTSET)

        shutil.rmtree(join(train_dir, root_dir_name))
示例#2
0
 def test_experiment(self):
     params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])])
     cmd = 'python super_rl.py'
     ex = Experiment('test', cmd, params.generate_params(randomize=False))
     cmds = ex.generate_experiments('train_dir', customize_experiment_name=True, param_prefix='--')
     for index, value in enumerate(cmds):
         command, name = value
         self.assertTrue(command.startswith(cmd))
         self.assertTrue(name.startswith(f'0{index}_test'))
示例#3
0
 def test_descr(self):
     params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])])
     experiments = [
         Experiment('test1', 'python super_rl1.py', params.generate_params(randomize=False)),
         Experiment('test2', 'python super_rl2.py', params.generate_params(randomize=False)),
     ]
     rd = RunDescription('test_run', experiments)
     cmds = rd.generate_experiments('train_dir')
     for command, name, root_dir, env_vars in cmds:
         exp_name = split(root_dir)[-1]
         self.assertIn('--experiment', command)
         self.assertIn('--experiments_root', command)
         self.assertTrue(exp_name in name)
         self.assertTrue(root_dir.startswith('test_run'))
示例#4
0
    ('ppo_clip_value', [10]),
    ('with_vtrace', ['False']),
    ('learning_rate', [0.0001]),
    ('max_grad_norm', [100.0]),
    ('use_rnn', ['False']),
    ('recurrence', [1]),
    ('num_minibatches_to_accumulate', [0]),
    ('device', ['gpu']),
    ('actor_critic_share_weights', ['False']),
    ('max_policy_lag', [1000000]),
    ('adaptive_stddev', ['False']),
    ('ppo_epochs', [20]),
    ('ppo_clip_ratio', [0.3]),
    ('batch_size', [1024]),
    ('num_batches_per_iteration', [10]),
    ('rollout', [128]),
    ('nonlinearity', ['tanh']),
    ('exploration_loss_coeff', [0.0]),
])

_experiment = Experiment(
    'mujoco_hopper',
    'python -m sample_factory.run_algorithm --env=mujoco_hopper --train_for_env_steps=7000000 --algo=APPO --num_workers=16 --num_envs_per_worker=4 --benchmark=False --with_pbt=False',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('mujoco_hopper_v94',
                                 experiments=[_experiment])

# python -m runner.run --run=mujoco_halfcheetah_grid_search --runner=processes --max_parallel=8  --pause_between=1 --experiments_per_gpu=10000 --num_gpus=1
示例#5
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('use_rnn', [True, False]),
    ('mem_size', [4, 0]),
])

cmd = 'python -m train_pytorch --algo=PPO --rollout=64 --recurrence=32 --num_envs=96 --num_workers=96 --train_for_env_steps=1000000000 --normalize_advantage=False --prior_loss_coeff=0.005 '

# IMPORTANT: for DMLAB number of workers better be equal to the number of envs, otherwise spurious crashes may occur!
_experiment_nm = Experiment(
    'mem_dmlab_nm',
    cmd + '--reward_scale=0.1 --env=dmlab_nonmatch',
    _params.generate_params(randomize=False),
)
_experiment_wm = Experiment(
    'mem_dmlab_wm',
    cmd + '--reward_scale=1.0 --env=dmlab_watermaze',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('mem_dmlab_v24', experiments=[_experiment_nm, _experiment_wm])
示例#6
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [1111, 2222, 3333, 4444, 5555]),
    ('env', ['doom_health_gathering_supreme']),
])

_experiments = [
    Experiment(
        'health_0_255',
        'python -m sample_factory.algorithms.appo.train_appo --train_for_env_steps=40000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=12 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),
    Experiment(
        'health_128_128',
        'python -m sample_factory.algorithms.appo.train_appo --train_for_env_steps=40000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=12 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False --obs_subtract_mean=128.0 --obs_scale=128.0',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('doom_health_gathering_v97_fs4',
                                 experiments=_experiments)
示例#7
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params_earlystop = ParamGrid([
    ('seed', [0000, 1111, 2222, 3333, 4444]),
])

_experiment_earlystop = Experiment(
    'lunar_lander_cont',
    'python -m sample_factory_examples.train_gym_env --train_for_env_steps=500000000 --algo=APPO --num_workers=20 --num_envs_per_worker=6 --seed 0 --gae_lambda 0.99 --experiment=lunar_lander_2 --env=gym_LunarLanderContinuous-v2 --exploration_loss_coeff=0.0 --max_grad_norm=0.0 --encoder_type=mlp --encoder_subtype=mlp_mujoco --encoder_extra_fc_layers=0 --hidden_size=128 --policy_initialization=xavier_uniform --actor_critic_share_weights=False --adaptive_stddev=False --recurrence=1 --use_rnn=False --batch_size=256 --ppo_epochs=4 --with_vtrace=False --reward_scale=0.05 --max_policy_lag=100000 --save_every_sec=15 --experiment_summaries_interval=10',
    _params_earlystop.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('lunar_lander_cont_v100', experiments=[_experiment_earlystop])