def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=1, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) run_slot=0, cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(affinity_code) config = configs["r2d1"] config["env"]["game"] = game config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler(EnvCls=voxel_make, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) config = dict(game=game) name = "r2d1_" + game log_dir = "tower_building" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(level="nav_maze_random_goal_01", run_ID=0, cuda_idx=None): config = configs['r2d1'] config['eval_env'] = dict(level=level) config['env'] = dict(level=level) affinity = make_affinity( run_slot=0, n_cpu_core=4, # Use 16 cores across all experiments. n_gpu=1, # Use 8 gpus across all experiments. hyperthread_offset=6, # If machine has 24 cores. n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. gpu_per_run=1, # How many GPUs to parallelize one run across. ) # sampler = GpuSampler( # EnvCls=DeepmindLabEnv, # env_kwargs=config['env'], # eval_env_kwargs=config['eval_env'], # CollectorCls=GpuWaitResetCollector, # TrajInfoCls=LabTrajInfo, # **config["sampler"] # ) sampler = SerialSampler( EnvCls=DeepmindLabEnv, env_kwargs=config['env'], eval_env_kwargs=config['env'], batch_T=16, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "lab_dqn_" + level log_dir = "lab_example_2" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()