示例#1
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=DbGpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_gpu_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
示例#2
0
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=2,
        n_gpu=1,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        run_slot=0,
        cpu_per_run=1,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(affinity_code)
    config = configs["r2d1"]
    config["env"]["game"] = game
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(EnvCls=voxel_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbGpuResetCollector,
                              TrajInfoCls=AtariTrajInfo,
                              eval_env_kwargs=config["eval_env"],
                              **config["sampler"])
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    config = dict(game=game)
    name = "r2d1_" + game
    log_dir = "tower_building"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
示例#3
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AsyncSerialSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        eval_env_kwargs=config["env"],
        CollectorCls=DbCpuResetCollector,
        **config["sampler"]
    )
    algo = DDPG(optim_kwargs=config["optim"], **config["algo"])
    agent = DdpgAgent(**config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_ddpg_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
示例#4
0
def build_and_train(game="pong", run_ID=0):
    # Change these inputs to match local machine and desired parallelism.
    affinity = make_affinity(
        run_slot=0,
        n_cpu_core=8,  # Use 16 cores across all experiments.
        n_gpu=2,  # Use 8 gpus across all experiments.
        gpu_per_run=1,
        sample_gpu_per_run=1,
        async_sample=True,
        optim_sample_share_gpu=False,
        # hyperthread_offset=24,  # If machine has 24 cores.
        # n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        # gpu_per_run=2,  # How many GPUs to parallelize one run across.
        # cpu_per_run=1,
    )

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,
        env_kwargs=dict(game=game),
        batch_T=5,
        batch_B=36,
        max_decorrelation_steps=100,
        eval_env_kwargs=dict(game=game),
        eval_n_envs=2,
        eval_max_steps=int(10e3),
        eval_max_trajectories=4,
    )
    algo = DQN(
        replay_ratio=8,
        min_steps_learn=1e4,
        replay_size=int(1e5)
    )
    agent = AtariDqnAgent()
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=2e6,
        log_interval_steps=1e4,
        affinity=affinity,
    )
    config = dict(game=game)
    name = "async_dqn_" + game
    log_dir = "async_dqn"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
def build_and_train(game="aaai_multi", run_ID=0):
    # Change these inputs to match local machine and desired parallelism.
    affinity = make_affinity(
        run_slot=0,
        n_cpu_core=8,  # Use 16 cores across all experiments.
        n_gpu=1,  # Use 8 gpus across all experiments.
        sample_gpu_per_run=1,
        async_sample=True,
        optim_sample_share_gpu=True
        # hyperthread_offset=24,  # If machine has 24 cores.
        # n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        # gpu_per_run=2,  # How many GPUs to parallelize one run across.
        # cpu_per_run=1,
    )

    train_conf = PytConfig([
        Path(JSONS_FOLDER, 'configs', '2v2', 'all_equal.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_horizontally.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_vertically.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_west.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_east.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_north.json'),
        Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_south.json'),
    ])

    eval_conf = PytConfig({
        'all_equal': Path(JSONS_FOLDER, 'configs', '2v2', 'all_equal.json'),
        'more_horizontally': Path(JSONS_FOLDER, 'configs', '2v2', 'more_horizontally.json'),
        'more_vertically': Path(JSONS_FOLDER, 'configs', '2v2', 'more_vertically.json'),
        'more_south': Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_south.json'),
        'more_east': Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_east.json')
    })

    sampler = AsyncGpuSampler(
        EnvCls=Rlpyt_env,
        TrajInfoCls=AaaiTrajInfo,
        env_kwargs={
            'pyt_conf': train_conf,
            'max_steps': 3000
        },
        batch_T=8,
        batch_B=8,
        max_decorrelation_steps=100,
        eval_env_kwargs={
            'pyt_conf': eval_conf,
            'max_steps': 3000
        },
        eval_max_steps=24100,
        eval_n_envs=2,
    )
    algo = DQN(
        replay_ratio=1024,
        double_dqn=True,
        prioritized_replay=True,
        min_steps_learn=5000,
        learning_rate=0.0001,
        target_update_tau=1.0,
        target_update_interval=1000,
        eps_steps=5e4,
        batch_size=512,
        pri_alpha=0.6,
        pri_beta_init=0.4,
        pri_beta_final=1.,
        pri_beta_steps=int(7e4),
        replay_size=int(1e6),
        clip_grad_norm=1.0,
        updates_per_sync=6
    )
    agent = DqnAgent(ModelCls=Frap)
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        log_interval_steps=1000,
        affinity=affinity,
        n_steps=6e5
    )

    config = dict(game=game)
    name = "frap_" + game
    log_dir = Path(PROJECT_ROOT, "saved", "rlpyt", "multi", "frap")

    save_path = Path(log_dir, 'run_{}'.format(run_ID))
    for f in save_path.glob('**/*'):
        print(f)
        f.unlink()

    with logger_context(str(log_dir), run_ID, name, config,
                        snapshot_mode='last', use_summary_writer=True, override_prefix=True):
        runner.train()