Exemplo n.º 1
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AsyncSerialSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        eval_env_kwargs=config["env"],
        CollectorCls=DbCpuResetCollector,
        **config["sampler"]
    )
    algo = DDPG(optim_kwargs=config["optim"], **config["algo"])
    agent = DdpgAgent(**config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_ddpg_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Exemplo n.º 2
0
def build_and_train(game="pong", run_ID=0):
    # Change these inputs to match local machine and desired parallelism.
    affinity = make_affinity(
        run_slot=0,
        n_cpu_core=2,  # Use 16 cores across all experiments.
        n_gpu=1,  # Use 8 gpus across all experiments.
        sample_gpu_per_run=0,
        async_sample=True,  # Different affinity structure fo async.
        # hyperthread_offset=24,  # If machine has 24 cores.
        # n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        # gpu_per_run=2,  # How many optimizer GPUs to parallelize one run.
        # cpu_per_run=1,
    )

    sampler = AsyncSerialSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,
        env_kwargs=dict(game=game),
        CollectorCls=DbCpuResetCollector,
        batch_T=5,
        batch_B=4,
        max_decorrelation_steps=100,
        eval_env_kwargs=dict(game=game),
        eval_n_envs=1,
        eval_max_steps=int(10e3),
        eval_max_trajectories=2,
    )
    algo = DQN(replay_ratio=18, min_steps_learn=5e3, replay_size=int(1e5))
    agent = AtariDqnAgent()
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=2e6,
        log_interval_steps=5e3,
        affinity=affinity,
    )
    config = dict(game=game)
    name = "async_dqn_" + game
    log_dir = "async_dqn"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Exemplo n.º 3
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncSerialSampler(EnvCls=AtariEnv,
                                 env_kwargs=config["env"],
                                 CollectorCls=DbCpuResetCollector,
                                 TrajInfoCls=AtariTrajInfo,
                                 eval_env_kwargs=config["eval_env"],
                                 **config["sampler"])
    algo = DQN(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = "async_serial_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()