Exemplo n.º 1
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

default_config_key = "td3_1M_serial"
script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py"
experiment_title = "td3_mujoco_v3"

affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=4,
    hyperthread_offset=20,
    n_socket=1,
    cpu_per_run=1,
    contexts_per_gpu=1,
)
runs_per_setting = 2
variant_levels_1M = list()
variant_levels_3M = list()

n_steps = [3e6]
values = list(zip(n_steps))
dir_names = ["3M"]
keys = [("runner", "n_steps")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))

n_steps = [1e6]
values = list(zip(n_steps))
dir_names = ["1M"]
keys = [("runner", "n_steps")]
variant_levels_1M.append(VariantLevel(keys, values, dir_names))
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py"
# default_config_key = "0"
affinity_code = encode_affinity(
    n_cpu_core=6,
    n_gpu=2,
    hyperthread_offset=8,
    n_socket=1,
    # cpu_per_run=2,
)
runs_per_setting = 1
experiment_title = "lstm_4frame_test"
variant_levels = list()

learning_rate = [1e-4] * 4
entropy_loss_coeff = [0.01, 0.4, 0.04, 0.1]
values = list(zip(learning_rate, entropy_loss_coeff))
dir_names = ["test_{}lr_{}ent".format(*v) for v in values]
keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")]
variant_levels.append(VariantLevel(keys, values, dir_names))


games = ["seaquest"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))
Exemplo n.º 3
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=8,
    contexts_per_gpu=2,
    hyperthread_offset=24,
    n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 5
variant_levels = list()

env_ids = ["Hopper-v3", "HalfCheetah-v3", "Walker2d-v3", "Ant-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

# default_config_key = "ppo_1M_serial"
# script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py"
# experiment_title = "ppo_mujoco"

# run_experiments(
#     script=script,
#     affinity_code=affinity_code,
#     experiment_title=experiment_title,
Exemplo n.º 4
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=4,
    hyperthread_offset=20,
    n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_basic_cpusamp_1of2"
variant_levels = list()

games = ["pong", "seaquest"] #, "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "dqn"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu_noeval.py"
affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=8,
    hyperthread_offset=24,
    n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_no_eval"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "no_eval"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
Exemplo n.º 6
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py"
affinity_code = encode_affinity(
    n_cpu_core=40,
    n_gpu=4,
    async_sample=True,
    gpu_per_run=1,
    sample_gpu_per_run=2,
    # hyperthread_offset=24,
    # optim_sample_share_gpu=True,
    n_socket=1,  # Force this.
    alternating=True,
)
runs_per_setting = 2
experiment_title = "atari_r2d1_async_alt"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "async_alt_dgx"

run_experiments(
Exemplo n.º 7
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=2,
    n_gpu=0,
    hyperthread_offset=2,
    n_socket=1,
    cpu_per_run=2,
)
runs_per_setting = 1
default_config_key = "ppo_1M"
experiment_title = "first_test_mujoco"
variant_levels = list()

env_ids = ["Hopper-v2"]  # , "Swimmer-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

norm_obs = [True]
values = list(zip(norm_obs))
dir_names = ["TrueNormObs"]
keys = [("model", "normalize_observation")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=8,
    async_sample=True,
    sample_gpu_per_run=2,
    gpu_per_run=2,
    # hyperthread_offset=24,
    # optim_sample_share_gpu=True,
    # n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_async_gpu"
variant_levels = list()

games = ["pong"]  # , "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

priorities = [False, True]
values = list(zip(priorities))
dir_names = ["pri_{}".format(*v) for v in values]
keys = [("algo", "prioritized_replay")]
variant_levels.append(VariantLevel(keys, values, dir_names))
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=8,
    async_sample=True,
    sample_gpu_per_run=0,
    gpu_per_run=1,
    hyperthread_offset=24,
    # n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_async_cpu"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "cpu"

run_experiments(
    script=script,
Exemplo n.º 10
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=12,
    n_gpu=1,
    hyperthread_offset=20,
    n_socket=1,
)
runs_per_setting = 2
experiment_title = "atari_r2d1_long_4tr"
variant_levels = list()

games = ["gravitar"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "r2d1_long_4tr"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
Exemplo n.º 11
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=6,
    # hyperthread_offset=24,
    n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_basic_cpu"
variant_levels = list()

games = ["pong", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "dqn"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_async_serial.py"
affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=4,
    # contexts_per_gpu=2,
    async_sample=True,
    # hyperthread_offset=2,
    # n_socket=1,
    # cpu_per_run=1,
)
runs_per_setting = 2
default_config_key = "async_serial"
experiment_title = "ddpg_mujoco_async"
variant_levels = list()

env_ids = ["Hopper-v3", "HalfCheetah-v3"]  # , "Swimmer-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
Exemplo n.º 13
0
def main(args):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=16,
        n_gpu=1,
        contexts_per_gpu=16,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        cpu_per_run=1,
    )
    # Or try an automatic one, but results may vary:
    # affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True)

    default_config = make_default_config()

    # start building variants
    variant_levels = list()

    variant_choice = 3
    ############ experiments for eGreedy ############################
    if variant_choice == 0:
        values = [
            [
                "eGreedy",
                0.1,
            ],
            [
                "eGreedy",
                0.5,
            ],
            [
                "eGreedy",
                0.9,
            ],
        ]
        dir_names = ["eGreedy-e{}".format(v[1]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "epsilon"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for UCB ################################
    elif variant_choice == 1:
        values = [
            [
                "ucb",
                1,
            ],
            [
                "ucb",
                5,
            ],
            [
                "ucb",
                10,
            ],
        ]
        dir_names = ["{}-c{}".format(*v) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "c"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ############ experiments for Thompson sampling ##################
    elif variant_choice == 2:
        values = [
            [
                "thompson",
                [[1, 1], [1, 1], [1, 1]],
            ],
            [
                "thompson",
                [[601, 401], [401, 601], [2, 3]],
            ],
        ]
        dir_names = ["{}-prior{}".format(v[0], v[1][0][0]) for v in values]
        keys = [
            ("solution", ),
            ("agent_kwargs", "prior"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ########## experiments for graident bandit ######################
    elif variant_choice == 3:
        values = [
            [
                "gradientBandit",
            ],
        ]
        dir_names = ["{}".format(*v) for v in values]
        keys = [
            ("solution", ),
        ]
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.2,
            ],
            [
                1.0,
            ],
            [
                2.0,
            ],
            [
                5.0,
            ],
        ]
        dir_names = ["beta{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "beta"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

        values = [
            [
                0.0,
            ],
            [
                0.8,
            ],
            [
                5.0,
            ],
            [
                20.0,
            ],
        ]
        dir_names = ["b{}".format(*v) for v in values]
        keys = [
            ("agent_kwargs", "b"),
        ]  # each entry in the list is the string path to your config
        variant_levels.append(VariantLevel(keys, values, dir_names))

    ######### Done setting hyper-parameters #########################
    else:
        raise ValueError("Wrong experiment choice {}".format(variant_choice))

    # get all variants and their own log directory
    variants, log_dirs = make_variants(*variant_levels)
    for i, variant in enumerate(variants):
        variants[i] = update_config(default_config, variant)

    run_experiments(
        script="girl/experiments/bandit/bandit.py",
        affinity_code=affinity_code,
        experiment_title="Bandit",
        runs_per_setting=200,
        variants=variants,
        log_dirs=log_dirs,  # the directory under "${experiment title}"
        debug_mode=args.
        debug,  # if greater than 0, the launcher will run one variant in this process)
    )
Exemplo n.º 14
0
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_cpu.py"
# default_config_key = "0"
affinity_code = encode_affinity(  # Let it be kwargs?
    n_cpu_core=6,
    n_gpu=2,
    hyperthread_offset=8,
    n_socket=1,
    # cpu_per_run=4,
)
runs_per_setting = 2
experiment_title = "lstm_test_gpu"
variant_levels = list()

# learning_rate = [7e-4] * 4
# entropy_loss_coeff = [0.01, 0.02, 0.04, 0.08]
# values = list(zip(learning_rate, entropy_loss_coeff))
# dir_names = ["test_{}lr_{}ent".format(*v) for v in values]
# keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")]
# variant_levels.append(VariantLevel(keys, values, dir_names))

learning_rate = [7e-4] * 2
batch_T = [5, 20]
values = list(zip(learning_rate, batch_T))
dir_names = ["test_{}lr_{}T".format(*v) for v in values]
keys = [("algo", "learning_rate"), ("sampler", "batch_T")]
variant_levels.append(VariantLevel(keys, values, dir_names))
Exemplo n.º 15
0
with many different inputs to encode, and see what comes out.

The results will be logged with a folder structure according to the
variant levels constructed here.

"""

from exptools.launching.affinity import encode_affinity, quick_affinity_code
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

# Either manually set the resources for the experiment:
affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=2,
    # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
    # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
    cpu_per_run=1,
    set_affinity=True,  # it can help to restrict workers to individual CPUs
)
# Or try an automatic one, but results may vary:
# affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True)

runs_per_setting = 2
experiment_title = "example_6"
variant_levels = list()

# Within a variant level, list each combination explicitly.
learning_rate = [7e-4, 1e-3]
batch_B = [16, 32]
values = list(zip(learning_rate, batch_B))
dir_names = ["example6_{}lr_{}B".format(*v) for v in values]
from exptools.launching.affinity import encode_affinity
from exptools.launching.exp_launcher import run_experiments
from exptools.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py"
affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=4,
    # contexts_per_gpu=1,
    # hyperthread_offset=24,
    # n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 4
default_config_key = "ppo_1M_serial"
experiment_title = "ppo_mujoco_v3_serial_hc_tl"
# variant_levels_1M = list()
variant_levels_3M = list()

# n_steps = [1e6]
# values = list(zip(n_steps))
# dir_names = ["1M"]
# keys = [("runner", "n_steps")]
# variant_levels_1M.append(VariantLevel(keys, values, dir_names))

bootstrap_tls = [True]
values = list(zip(bootstrap_tls))
dir_names = ["bootstrap_timelimit"]
keys = [("algo", "bootstrap_timelimit")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))