Exemplo n.º 1
0
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=2,
        n_gpu=1,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        run_slot=0,
        cpu_per_run=1,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(affinity_code)
    config = configs["r2d1"]
    config["env"]["game"] = game
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(EnvCls=voxel_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbGpuResetCollector,
                              TrajInfoCls=AtariTrajInfo,
                              eval_env_kwargs=config["eval_env"],
                              **config["sampler"])
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    config = dict(game=game)
    name = "r2d1_" + game
    log_dir = "tower_building"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
Exemplo n.º 2
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=DbGpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_gpu_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Exemplo n.º 3
0
def build_and_train(level="nav_maze_random_goal_01", run_ID=0, cuda_idx=None):
    config = configs['r2d1']
    config['eval_env'] = dict(level=level)
    config['env'] = dict(level=level)

    affinity = make_affinity(
        run_slot=0,
        n_cpu_core=4,  # Use 16 cores across all experiments.
        n_gpu=1,  # Use 8 gpus across all experiments.
        hyperthread_offset=6,  # If machine has 24 cores.
        n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        gpu_per_run=1,  # How many GPUs to parallelize one run across.
    )

    # sampler = GpuSampler(
    #     EnvCls=DeepmindLabEnv,
    #     env_kwargs=config['env'],
    #     eval_env_kwargs=config['eval_env'],
    #     CollectorCls=GpuWaitResetCollector,
    #     TrajInfoCls=LabTrajInfo,
    #     **config["sampler"]
    # )
    sampler = SerialSampler(
        EnvCls=DeepmindLabEnv,
        env_kwargs=config['env'],
        eval_env_kwargs=config['env'],
        batch_T=16,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "lab_dqn_" + level
    log_dir = "lab_example_2"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()