def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=1, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) run_slot=0, cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(affinity_code) config = configs["r2d1"] config["env"]["game"] = game config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler(EnvCls=voxel_make, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) config = dict(game=game) name = "r2d1_" + game log_dir = "tower_building" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def run_task(vv, log_dir, exp_name): vv = update_env_kwargs(vv) run_ID = vv['seed'] config_key = vv['config_key'] slot_affinity_code = encode_affinity( n_cpu_core=20, n_gpu=2, n_socket=2, run_slot=0, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] config.update(**vv) # config["env"] = env_arg_dict[config['env_name']] vv['env_kwargs']['headless'] = True sac_module = 'rlpyt.algos.qpg.{}'.format(config['sac_module']) sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module']) sac_module = importlib.import_module(sac_module) sac_agent_module = importlib.import_module(sac_agent_module) SAC = sac_module.SAC SacAgent = sac_agent_module.SacAgent if 'pixel_wrapper_kwargs' in config['env']: info_keys = config.get('info_keys', None) state_keys = config.get('state_keys', None) init_namedtuples(info_keys=info_keys, state_keys=state_keys) sampler = CpuSampler( EnvCls=SOFTGYM_ENVS[vv['env_name']], env_kwargs=vv['env_kwargs'], CollectorCls=CpuResetCollector, eval_env_kwargs=vv['env_kwargs'], **config["sampler"] ) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "sac_{}".format(vv['env_name']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(game="pong", run_ID=0): # Seems like we should be able to skip the intermediate step of the code, # but so far have just always run that way. # Change these inputs to match local machine and desired parallelism. affinity_code = encode_affinity( n_cpu_cores=16, # Use 16 cores across all experiments. n_gpu=8, # Use 8 gpus across all experiments. hyperthread_offset=24, # If machine has 24 cores. n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) slot_affinity_code = prepend_run_slot(run_slot=0, affinity_code=affinity_code) affinity = get_affinity(slot_affinity_code) breakpoint() sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), CollectorCls=WaitResetCollector, batch_T=5, batch_B=16, max_decorrelation_steps=400, ) algo = A2C() # Run with defaults. agent = AtariFfAgent() runner = MultiGpuRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) config = dict(game=game) name = "a2c_" + game log_dir = "example_7" with logger_context(log_dir, run_ID, name, config): runner.train()
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel import pathlib path = pathlib.Path( __file__).resolve().parent.parent / 'train' / "mujoco_ff_ppoc_gpu.py" script = path.as_posix() # script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_gpu.py" affinity_code = encode_affinity(n_cpu_core=6, n_gpu=1, contexts_per_gpu=6, n_socket=1, alternating=True) runs_per_setting = 6 default_config_key = "ppoc_1M_halfcheetahtransfer" experiment_title = "PPOC_Transfer" variant_levels = list() variants, log_dirs = make_variants(*variant_levels) run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_config_key, ), )
import multiprocessing import GPUtil from rlpyt.utils.launching.affinity import encode_affinity, quick_affinity_code from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel # script to launch hyperparameter search num_cpus = multiprocessing.cpu_count() num_gpus = len(GPUtil.getAvailable()) affinity_code = encode_affinity( n_cpu_core=num_cpus, n_gpu=num_gpus, set_affinity=True, # it can help to restrict workers to individual CPUs ) runs_per_setting = 1 experiment_title = "parkour_challenge" variants = [ { 'algo': 'ppo', 'ppo_kwargs': dict(minibatches=4), 'sampler_kwargs': dict(batch_B=32) }, { 'algo': 'ppo', 'ppo_kwargs': dict(minibatches=32), 'sampler_kwargs': dict(batch_B=32)
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=4, hyperthread_offset=24, n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_r2d1_long" variant_levels = list() games = ["gravitar", "asteroids"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "r2d1_long" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" affinity_code = encode_affinity( n_cpu_core=20, n_gpu=4, async_sample=True, gpu_per_run=1, sample_gpu_per_run=2, # hyperthread_offset=24, # optim_sample_share_gpu=True, n_socket=1, # Force this. alternating=True, ) runs_per_setting = 1 experiment_title = "atari_r2d1_async_alt" variant_levels = list() games = ["seaquest"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "async_alt_got" run_experiments(
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import VariantLevel, make_variants script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_sac_async_gpu.py" affinity_code = encode_affinity( n_cpu_core=16, n_gpu=4, # contexts_per_gpu=2, async_sample=True, sample_gpu_per_run=1, # hyperthread_offset=2, # n_socket=1, # cpu_per_run=1, ) runs_per_setting = 2 default_config_key = "async_gpu" experiment_title = "sac_mujoco_async" variant_levels = list() env_ids = ["Hopper-v3", "HalfCheetah-v3"] # , "Swimmer-v3"] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) run_experiments( script=script, affinity_code=affinity_code,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py" affinity_code = encode_affinity( n_cpu_core=8, n_gpu=2, hyperthread_offset=8, n_socket=1, contexts_per_run=2, # cpu_per_run=2, ) runs_per_setting = 1 experiment_title = "atari_ff_a2c_multi" variant_levels = list() games = ["pong"] # , "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "0" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title,
import pathlib from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel PHYSX_N_THREADS = 4 # By default, isaac environments use 4 cores by themselves. Each sampler will use its own as well affinity_code = encode_affinity( n_cpu_core=6, n_gpu=1, contexts_per_gpu=2, # This thing chews GPU memory n_socket=1, alternating=False ) runs_per_setting = 3 # 3 runs # Paths path_ppo = (pathlib.Path(__file__).resolve().parent.parent / 'train' / "isaac_ff_ppo_gpu.py").as_posix() path_ppoc = (pathlib.Path(__file__).resolve().parent.parent / 'train' / "isaac_ff_ppoc_gpu.py").as_posix() # Default keys default_key = 'nv_ant' default_oc_key = 'nv_ant_oc' # Param options PPO_LRS = list(zip([1e-4, 3e-4, 1e-3])) OC_DELIB = list(zip([0., 0.01, 1.])) OC_SIZES = list(zip([2,4])) tasks = list(zip(['Ant'])) # Variant keys lr_key = [("algo", "learning_rate")] delib_key = [("algo", "delib_cost")] oc_size_key = [("model", "option_size")] interest_key = [("model", "use_interest")]
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_cpu.py" # default_config_key = "0" affinity_code = encode_affinity( # Let it be kwargs? n_cpu_cores=6, n_gpu=2, hyperthread_offset=8, n_socket=1, # cpu_per_run=4, ) runs_per_setting = 2 experiment_title = "lstm_test_gpu" variant_levels = list() # learning_rate = [7e-4] * 4 # entropy_loss_coeff = [0.01, 0.02, 0.04, 0.08] # values = list(zip(learning_rate, entropy_loss_coeff)) # dir_names = ["test_{}lr_{}ent".format(*v) for v in values] # keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")] # variant_levels.append(VariantLevel(keys, values, dir_names)) learning_rate = [7e-4] * 2 batch_T = [5, 20] values = list(zip(learning_rate, batch_T)) dir_names = ["test_{}lr_{}T".format(*v) for v in values] keys = [("algo", "learning_rate"), ("sampler", "batch_T")]
from ul_gen.agents.discrete_sac_ae_agent import DiscreteSacAEAgent import argparse parser = argparse.ArgumentParser() parser.add_argument("--savepath", type=str, default="./ae_data/") args = parser.parse_args() os.makedirs(args.savepath, exist_ok=True) EmptyAgentInfo = namedarraytuple("EmptyAgentInfo", []) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") affinity_code = encode_affinity( n_cpu_core=4, n_gpu=1, n_socket=1, ) affinity = affinity_from_code(prepend_run_slot(0, affinity_code)) # Get Params config = configs["discrete_sac_ae"] # Setup the data collection pipeline # Edit the sampler kwargs to get a larger batch size config["sampler"]["batch_T"] = 24 config["sampler"]["batch_B"] = 16 sampler = GpuSampler(EnvCls=gym.make, env_kwargs=config["env"], CollectorCls=GpuResetCollector,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import VariantLevel, make_variants script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=6, # hyperthread_offset=24, n_socket=2, # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_basic_cpu" variant_levels = list() games = ["pong", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "dqn" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
with many different inputs to encode, and see what comes out. The results will be logged with a folder structure according to the variant levels constructed here. """ from rlpyt.utils.launching.affinity import encode_affinity, quick_affinity_code from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import VariantLevel, make_variants # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=0, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) # Or try an automatic one, but results may vary: # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True) runs_per_setting = 2 experiment_title = "example_6" variant_levels = list() # Within a variant level, list each combination explicitly. learning_rate = [7e-4, 1e-3] batch_B = [16, 32] values = list(zip(learning_rate, batch_B)) dir_names = ["example6_{}lr_{}B".format(*v) for v in values]
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py" affinity_code = encode_affinity( n_cpu_core=4, n_gpu=4, # contexts_per_gpu=1, # hyperthread_offset=24, # n_socket=2, # cpu_per_run=2, ) runs_per_setting = 4 default_config_key = "ppo_1M_serial" experiment_title = "ppo_mujoco_v3_serial_hc_tl" # variant_levels_1M = list() variant_levels_3M = list() # n_steps = [1e6] # values = list(zip(n_steps)) # dir_names = ["1M"] # keys = [("runner", "n_steps")] # variant_levels_1M.append(VariantLevel(keys, values, dir_names)) bootstrap_tls = [True] values = list(zip(bootstrap_tls)) dir_names = ["bootstrap_timelimit"] keys = [("algo", "bootstrap_timelimit")] variant_levels_3M.append(VariantLevel(keys, values, dir_names))
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel affinity_code = encode_affinity( n_cpu_cores=2, n_gpu=0, hyperthread_offset=2, n_socket=1, cpu_per_run=1, ) runs_per_setting = 2 variant_levels = list() env_ids = ["Hopper-v2"] # , "Swimmer-v3"] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "ddpg_from_td3_1M_serial" script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py" experiment_title = "ddpg_mujoco" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" affinity_code = encode_affinity( n_cpu_core=20, n_gpu=4, contexts_per_gpu=1, n_socket=2, ) runs_per_setting = 1 default_config_key = "sac_pixels_clothv8" experiment_title = "sac_dm_control_pixels_clothv8" variant_levels = list() domain = ['cloth_v8'] task = ['easy'] values = list(zip(domain, task)) dir_names = ["env_{}_{}".format(*v) for v in values] keys = [('env', 'domain'), ('env', 'task')] variant_levels.append(VariantLevel(keys, values, dir_names)) modes = ['corners', 'border', '3x3', '9x9'] values = list(zip(modes)) dir_names = ['mode_{}'.format(*v) for v in values] keys = [('env', 'task_kwargs', 'mode')] variant_levels.append(VariantLevel(keys, values, dir_names)) #distance_weight = [0.0, 2.0]
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_cpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=8, async_sample=True, sample_gpu_per_run=0, gpu_per_run=1, hyperthread_offset=24, # n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_async_cpu" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "cpu" run_experiments( script=script, affinity_code=affinity_code,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel default_config_key = "td3_1M_serial" script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py" experiment_title = "td3_mujoco_v3" affinity_code = encode_affinity( n_cpu_core=4, n_gpu=4, hyperthread_offset=20, n_socket=1, cpu_per_run=1, contexts_per_gpu=1, ) runs_per_setting = 2 variant_levels_1M = list() variant_levels_3M = list() n_steps = [3e6] values = list(zip(n_steps)) dir_names = ["3M"] keys = [("runner", "n_steps")] variant_levels_3M.append(VariantLevel(keys, values, dir_names)) n_steps = [1e6] values = list(zip(n_steps)) dir_names = ["1M"] keys = [("runner", "n_steps")] variant_levels_1M.append(VariantLevel(keys, values, dir_names))
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel affinity_code = encode_affinity( n_cpu_core=16, n_gpu=8, contexts_per_gpu=2, hyperthread_offset=24, n_socket=2, ) runs_per_setting = 3 variant_levels = list() env_ids = [ "Hopper-v3", "HalfCheetah-v3", "Walker2d-v3", "Ant-v3", "Humanoid-v3" ] values = list(zip(env_ids)) dir_names = ["env_{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "ddpg_from_td3_1M_serial" script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py" experiment_title = "ddpg_mujoco" run_experiments( script=script, affinity_code=affinity_code,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel import ul_gen import os affinity_code = encode_affinity( n_cpu_core=4, n_gpu=0, # hyperthread_offset=20, contexts_per_gpu=0, n_socket=1 # cpu_per_run=2, ) runs_per_setting = 3 variant_levels = list() tasks = ['gym:CartPole-v1'] values = list(zip(tasks)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "id")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) print("Variants", variants) print("Log_dirs", log_dirs) script = "launch/train_discrete_sac.py"
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" affinity_code = encode_affinity( n_cpu_core=4, n_gpu=1, hyperthread_offset=8, n_socket=1, # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_basic" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "dqn" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac_autoreg.py" affinity_code = encode_affinity( n_cpu_core=16, n_gpu=4, contexts_per_gpu=2, ) runs_per_setting = 2 default_config_key = "sac_state_clothv7" experiment_title = "sac_dm_control_state_clothv7" variant_levels = list() modes = ['corners', 'border', 'inner_border', '3x3', '5x5', '9x9'] values = list(zip(modes)) dir_names = ['mode_{}'.format(*v) for v in values] keys = [('env', 'task_kwargs', 'mode')] variant_levels.append(VariantLevel(keys, values, dir_names)) distance_weight = [0.0, 2.0] values = list(zip(distance_weight)) dir_names = ['distance_weight_{}'.format(*v) for v in values] keys = [('env', 'task_kwargs', 'distance_weight')] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels)
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/atari/dqn/train/football_dqn_async_gpu.py" affinity_code = encode_affinity( n_cpu_core=24, n_gpu=8, async_sample=True, sample_gpu_per_run=2, gpu_per_run=2, # hyperthread_offset=24, # optim_sample_share_gpu=True, # n_socket=2, ) runs_per_setting = 2 experiment_title = "atari_dqn_async_gpu" variant_levels = list() games = ["pong"] # , "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) priorities = [False, True] values = list(zip(priorities)) dir_names = ["pri_{}".format(*v) for v in values] keys = [("algo", "prioritized_replay")] variant_levels.append(VariantLevel(keys, values, dir_names))
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import make_variants, VariantLevel script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" affinity_code = encode_affinity( n_cpu_core=20, n_gpu=1, contexts_per_gpu=1, ) runs_per_setting = 4 default_config_key = "sac_pixels_cloth_corner" experiment_title = "pixels_cloth_point_rolled_back_lower_starting" variant_levels = list() domain = ['cloth_point'] task = ['easy'] values = list(zip(domain, task)) dir_names = ["env_{}_{}".format(*v) for v in values] keys = [('env', 'domain'), ('env', 'task')] variant_levels.append(VariantLevel(keys, values, dir_names)) model_cls = ['PiConvModel'] #, 'GumbelPiConvModel'] random_location = [True] #, False] sac_module = ['sac_v2'] #, 'sac_v2_generic'] sac_agent_module = ['sac_agent_v2'] #, 'sac_agent_v2_generic'] state_keys = [['location', 'pixels']] #, ['pixels']] values = list( zip(model_cls, random_location, sac_module, sac_agent_module, state_keys)) dir_names = ["model_cls_{}_rnd_loc_{}".format(*v) for v in values]
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import VariantLevel, make_variants script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" affinity_code = encode_affinity(n_cpu_core=16, n_gpu=4, hyperthread_offset=20, n_socket=2 # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_ff_a2c_basic" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "0" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants,
from rlpyt.utils.launching.affinity import encode_affinity from rlpyt.utils.launching.exp_launcher import run_experiments from rlpyt.utils.launching.variant import VariantLevel, make_variants script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" affinity_code = encode_affinity( n_cpu_core=12, n_gpu=1, hyperthread_offset=20, n_socket=1, ) runs_per_setting = 2 experiment_title = "atari_r2d1_long_4tr" variant_levels = list() games = ["gravitar"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "r2d1_long_4tr" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants,