def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_16env", experiment_title="exp", snapshot_mode="none", snapshot_gap=None, ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) # Hack that the first part of the log_dir matches the source of the model model_base_dir = config["pretrain"]["model_dir"] if model_base_dir is not None: raw_log_dir = log_dir.split(experiment_title)[-1].lstrip( "/") # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/ model_sub_dir = raw_log_dir.split("/RlFromUl/")[ 0] # keep the UL part, which comes first config["agent"]["state_dict_filename"] = osp.join( model_base_dir, model_sub_dir, "run_0/params.pkl") pprint.pprint(config) sampler = AlternatingSampler( EnvCls=DmlabEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, # TrajInfoCls=AtariTrajInfo, # eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["level"] if snapshot_gap is not None: snapshot_gap = int(snapshot_gap) with logger_context( log_dir, run_ID, name, config, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, ): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AlternatingSampler(EnvCls=gym_make, env_kwargs=config["env"], **config["sampler"]) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AlternatingSampler(EnvCls=ProcgenEnv, env_kwargs=config["env"], CollectorCls=GpuResetCollector, **config["sampler"]) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = ProcgenFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_ul_16env", snapshot_mode="none", snapshot_gap=None, ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = AlternatingSampler( EnvCls=DmlabEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, **config["sampler"] ) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["level"] if snapshot_gap is not None: snapshot_gap = int(snapshot_gap) with logger_context( log_dir, run_ID, name, config, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, ): runner.train()