示例#1
0
from rlpyt.utils.launching.affinity import quick_affinity_code
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

script = "rlpyt/projects/safe/experiments/scripts/train/train_cppo.py"

default_config_key = "LSTM"
affinity_code = quick_affinity_code()
runs_per_setting = 4
experiment_title = "PointGoal_Ki_Kp"

variant_levels = list()

env_ids = [
    # "Safexp-PointGoal0-v0",
    "Safexp-PointGoal1-v0",
    # "Safexp-PointGoal2-v0",
    # "Safexp-PointButton0-v0",
    # "Safexp-PointButton1-v0",
    # "Safexp-PointButton2-v0",
    # "Safexp-PointPush0-v0",
    # "Safexp-PointPush1-v0",
    # "Safexp-PointPush2-v0",
    # "Safexp-CarGoal0-v0",
    # "Safexp-CarGoal1-v0",
    # "Safexp-CarGoal2-v0",
    # "Safexp-CarButton0-v0",
    # "Safexp-CarButton1-v0",
    # "Safexp-CarButton2-v0",
    # "Safexp-CarPush0-v0",
    # "Safexp-CarPush1-v0",
from rlpyt.utils.launching.affinity import encode_affinity, quick_affinity_code
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

args = sys.argv[1:]
assert len(args) == 2
my_computer = int(args[0])
num_computers = int(args[1])

print(f"MY_COMPUTER: {my_computer},  NUM_COMPUTERS: {num_computers}")

script = (
    "rlpyt/ul/experiments/scripts/rl_with_ul/dmlab/train/dmlab_ppo_rl_with_ul_alt.py"
)

affinity_code = quick_affinity_code(contexts_per_gpu=1, alternating=True)
runs_per_setting = 2
experiment_title = "dmlab_ppo_with_ul_prioritized_4"

variant_levels_1 = list()
# variant_levels_2 = list()
# variant_levels_3 = list()

stop_conv_grads = [False, False, True]
ul_update_schedules = ["constant_2", "constant_4", "constant_4"]
min_steps_rl = [1e5, 1e5, 1e5]
values = list(zip(stop_conv_grads, ul_update_schedules, min_steps_rl))
dir_names = ["{}stpcnvgrd_{}_{}minrl".format(*v) for v in values]
keys = [
    ("model", "stop_conv_grad"),
    ("algo", "ul_update_schedule"),
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

args = sys.argv[1:]
assert len(args) == 2 or len(args) == 0
if len(args) == 0:
    my_computer, num_computers = 0, 1
else:
    my_computer = int(args[0])
    num_computers = int(args[1])

print(f"MY_COMPUTER: {my_computer},  NUM_COMPUTERS: {num_computers}")

script = "rlpyt/ul/experiments/ul_for_rl/scripts/dmcontrol/train_ul/dmc_atc.py"

affinity_code = quick_affinity_code(contexts_per_gpu=1)
runs_per_setting = 1
experiment_title = "dmc_atc_pretrain_1"
variant_levels_1 = list()
# variant_levels_2 = list()


# Just standard settings.


replay_base_dir = "/data/adam/ul4rl/replays/20200715/rad_sac_replaysave84"
domains = ["ball_in_cup", "cartpole", "cheetah", "walker"]
replay_filenames = [osp.join(replay_base_dir, game, "run_0/replaybuffer.pkl")
    for game in domains]
values = list(zip(replay_filenames, domains))
dir_names = domains