def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) print('Config', config) if 'pixel_wrapper_kwargs' in config['env']: info_keys = config.get('info_keys', None) state_keys = config.get('state_keys', None) init_namedtuples(info_keys=info_keys, state_keys=state_keys) sampler = CpuSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T= 4, # Four time-steps per sampler iteration. 在collector中采样数据的时候每个循环走多少个step batch_B=1, # 有多少个并行的environment实例 max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() # 在sampler中initialize runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, # 总共多少个step log_interval_steps=1e3, # 每多少个step记录一次日志 affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(env_id="Hopper-v3", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=gym_make, env_kwargs=dict(id=env_id), eval_env_kwargs=dict(id=env_id), batch_T=1, # One time-step per sampler iteration. batch_B=1, # One environment (i.e. sampler Batch dimension). max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(51e3), eval_max_trajectories=50, ) algo = SAC() # Run with defaults. agent = SacAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=1e6, log_interval_steps=1e4, affinity=dict(cuda_idx=cuda_idx), ) config = dict(env_id=env_id) name = "sac_" + env_id log_dir = "example_2" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None, n_parallel=2): config = dict( env=dict(game=game), algo=dict(batch_size=128), sampler=dict(batch_T=2, batch_B=32), ) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), CollectorCls=GpuWaitResetCollector, eval_env_kwargs=dict(game=game), max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, # batch_T=4, # Get from config. # batch_B=1, **config[ "sampler"] # More parallel environments for batched forward-pass. ) algo = DQN(**config["algo"]) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))), ) name = "dqn_" + game log_dir = "example_5" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID): # (Or load from a central store of configs.) config = dict( env=dict(game="pong"), algo=dict(learning_rate=7e-4), sampler=dict(batch_B=16), ) affinity = get_affinity(slot_affinity_code) variant = load_variant(log_dir) global config config = update_config(config, variant) sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, batch_T=5, # batch_B=16, # Get from config. max_decorrelation_steps=400, **config["sampler"]) algo = A2C(**config["algo"]) # Run with defaults. agent = AtariFfAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) name = "a2c_" + config["env"]["game"] log_dir = "example_6" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="dmlab_pc", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = PixelControl( optim_kwargs=config["optim"], EncoderCls=DmlabAtcEncoderModel, encoder_kwargs=config["encoder"], pixel_control_model_kwargs=config["pixel_control_model"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game #log_dir = "example_1" log_dir = get_outputs_path() with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="atari_ats", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) algo = AugmentedTemporalSimilarity( optim_kwargs=config["optim"], encoder_kwargs=config["encoder"], **config["algo"] ) runner = UnsupervisedLearning( algo=algo, affinity=affinity, **config["runner"] ) name = config["name"] with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(run_id=0, greedy_eval=False): sampler = SerialSampler( EnvCls=MyEnv, env_kwargs=dict(), eval_env_kwargs=dict(), batch_T=horizon, batch_B=64, max_decorrelation_steps=0, eval_n_envs=64, eval_max_steps=int(1e6), eval_max_trajectories=64, ) runner = MinibatchRl( algo=PPO(entropy_loss_coeff=0., learning_rate=3e-4), agent=MyAgent(greedy_eval), sampler=sampler, n_steps=int(400 * horizon * 64), log_interval_steps=int(10 * horizon * 64), ) log_params = dict() log_dir = "data/rl_example_2/{}".format( datetime.datetime.today().strftime("%Y%m%d_%H%M")) with logger_context(log_dir, run_id, 'Reacher2D', log_params=log_params, snapshot_mode="last", use_summary_writer=True, override_prefix=True): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) print('Variant', variant) config = update_config(config, variant) sampler = SerialSampler(EnvCls=DMControlEnv, env_kwargs=config["env"], CollectorCls=CpuResetCollector, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = SAC(optim_kwargs=config["optim"], **config["algo"]) agent = SacAgent(**config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = "sac_{}_{}".format(config['env']['domain'], config['env']['task']) with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncSerialSampler( EnvCls=gym_make, env_kwargs=config["env"], eval_env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, **config["sampler"] ) algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) agent = DdpgAgent(**config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_ddpg_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(cfg, game="ftwc", run_ID=0): #GVS NOTE: for ftwc/qait ?use CpuWaitResetCollector (or CpuResetCollector) sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e2), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e2) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cfg.cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "ftwc" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) eval_env_config = config["env"].copy() eval_env_config["start_level"] = config["env"]["num_levels"] + 100 eval_env_config["num_levels"] = 100 sampler = GpuSampler(EnvCls=make, env_kwargs=config["env"], CollectorCls=GpuResetCollector, eval_env_kwargs=eval_env_config, **config["sampler"]) if config["checkpoint"]: model_state_dict = torch.load(config["checkpoint"]) print("Loaded.") else: model_state_dict = None algo = PPO_AUG_VAE(optim_kwargs=config["optim"], **config["algo"]) agent = RADPgVaeAgent(ModelCls=RadVaePolicy, model_kwargs=config["model"], initial_model_state_dict=model_state_dict, **config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=1, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) run_slot=0, cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(affinity_code) config = configs["r2d1"] config["env"]["game"] = game config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler(EnvCls=voxel_make, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) config = dict(game=game) name = "r2d1_" + game log_dir = "tower_building" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = make_affinity( run_slot=0, n_cpu_core=os.cpu_count(), # Use 16 cores across all experiments. n_gpu=1, # Use 8 gpus across all experiments. gpu_per_run=1, sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=True) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler(EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=GpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_0gpu_1cpu_1cpr", log_dir="test", run_ID="0", config_key="LSTM", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = CpuSampler( EnvCls=safety_gym_make, env_kwargs=config["env"], TrajInfoCls=SafetyGymTrajInfo, **config["sampler"] ) algo = CppoPID(**config["algo"]) agent = CppoLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"], ) name = "cppo_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None, mid_batch_reset=False, n_parallel=2): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))) Collector = GpuResetCollector if mid_batch_reset else GpuWaitResetCollector print(f"To satisfy mid_batch_reset=={mid_batch_reset}, using {Collector}.") sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game, num_img_obs=1), # Learn on individual frames. CollectorCls=Collector, batch_T=20, # Longer sampling/optimization horizon for recurrence. batch_B=16, # 16 parallel environments. max_decorrelation_steps=400, ) algo = A2C() # Run with defaults. agent = AtariLstmAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=affinity, ) config = dict(game=game) name = "a2c_" + game log_dir = "example_4" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = get_affinity(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuParallelSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = A2C(optim_kwargs=config["optim"], **config["algo"]) agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train( slot_affinity_code="0slt_1gpu_1cpu", log_dir="test", run_ID="0", config_key="ppo_ul_16env", ): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) pprint.pprint(config) sampler = SerialSampler( EnvCls=AtariEnv84, env_kwargs=config["env"], CollectorCls=CpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["env"], # Same args! **config["sampler"]) algo = PpoUl(optim_kwargs=config["optim"], **config["algo"]) agent = AtariPgAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["algo_name"] = 'A2OC' t_env = pomdp_interface(**config["env"]) config["algo"]["discount"] = t_env.discount sampler = GpuSampler( EnvCls=pomdp_interface, env_kwargs=config["env"], **config["sampler"] ) algo = A2OC(optim_kwargs=config["optim"], **config["algo"]) agent = PomdpOcFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def test_rlpyt_simple(): """ partially copied from example 1 """ game = "pong" run_ID = 0 cuda_idx = None n_steps = 1 sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3, replay_size=1e3) # remove memory issues agent = AtariDqnAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=n_steps, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "test_example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = GpuSampler( EnvCls=gym.make, env_kwargs=config["env"], eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DiscreteSACAE(optim_kwargs=config["optim"], ae_optim_kwargs=config["ae_optim"], **config["algo"]) agent = DiscreteSacAEAgent(**config["agent"], encoder_kwargs=config["encoder"], model_kwargs=config["actor"], critic_kwargs=config["critic"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): runner.train()
def build_and_train(game="academy_empty_goal_close", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=create_single_football_env, env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=CpuWaitResetCollector, TrajInfoCls=AtariTrajInfo, **config["sampler"] ) algo = Dreamer(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDreamerAgent( model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] + str(config["sampler"]["batch_T"]) with logger_context(log_dir, run_ID, name, config): # Might have to flatten config runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = CpuSampler( EnvCls=gym_make, env_kwargs=config["env"], CollectorCls=CpuResetCollector, **config["sampler"] ) algo = PPO(optim_kwargs=config["optim"], **config["algo"]) agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, affinity=affinity, seed=int(run_ID) * 1000, **config["runner"] ) name = config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(env_id="HalfCheetah-Directional-v0", run_ID=0, cuda_idx=None, n_parallel=6): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel)), alternating=True) env_args = dict(id=env_id) env_args[RLPYT_WRAPPER_KEY] = [ClipActionsWrapper] # sampler = GpuSampler( # EnvCls=gym_make, # env_kwargs=env_args, # eval_env_kwargs=env_args, # batch_T=256, # One time-step per sampler iteration. # batch_B=8, # One environment (i.e. sampler Batch dimension). # max_decorrelation_steps=100, # eval_n_envs=5, # eval_max_steps=int(25e3), # eval_max_trajectories=30 # ) # agent = MujocoFfOcAgent(model_kwargs={'option_size': 2}) # sampler = AlternatingSampler( # EnvCls=gym_make, # env_kwargs=env_args, # eval_env_kwargs=env_args, # batch_T=256, # One time-step per sampler iteration. # batch_B=8, # One environment (i.e. sampler Batch dimension). # max_decorrelation_steps=100, # eval_n_envs=5, # eval_max_steps=int(25e3), # eval_max_trajectories=30 # ) # agent = AlternatingMujocoFfOcAgent(model_kwargs={'option_size': 2}) sampler = SerialSampler( EnvCls=gym_make, env_kwargs=env_args, eval_env_kwargs=env_args, batch_T=256, # One time-step per sampler iteration. batch_B=8, # One environment (i.e. sampler Batch dimension). max_decorrelation_steps=0, # eval_n_envs=2, # eval_max_steps=int(51e2), # eval_max_trajectories=5, ) agent = MujocoFfOcAgent(model_kwargs={'option_size': 2}) algo = PPOC(clip_vf_loss=False, normalize_rewards='return') # Run with defaults. runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=1e6, log_interval_steps=1e3, affinity=affinity, transfer=True, transfer_iter=150, log_traj_window=10 ) config = dict(env_id=env_id) name = "ppoc_" + env_id log_dir = "example_2a_ppoc" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), CollectorCls=ResetCollector, eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(log_dir, game="pong", run_ID=0, cuda_idx=None, eval=False): sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = Dreamer() # Run with defaults. agent = AtariDreamerAgent() runner_cls = MinibatchRlEval if eval else MinibatchRl runner = runner_cls( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dreamer_" + game with logger_context(log_dir, run_ID, name, config, snapshot_mode="last", override_prefix=True, use_summary_writer=True): runner.train()