import pickle, hashlib, os
import numpy as np
from common import common
from common import seeds, generate_exp, gym_interface

exp_name = "801.3.train"
exp_description = """
For each type,
we randomly pick two robots, and train a RL policy on them, with the observation and action space aligned, we measure the learnability at 2e6 steps.
for the same two robots, we train another RL policy on them, with the observation and action space randomized, we measure the learnability at 2e6 steps.
We repeat the two experiments 10 times, so we have a comparison between aligned and randomized when training on 2 robots.
We then repeat the same procedures on 4 robots, 8 robots, 16 robots, and have comparisons for those conditions.
"""
common.shell_header(exp_name, exp_description)

g_all_jobs = []
g_ge = generate_exp.GenerateExp()
def one_exp(num_bodies=2, pick_bodies_from=[], method="aligned", seed=0, repeat=10):
    global g_all_jobs, g_ge
    max_num_joints = gym_interface.get_max_num_joints(pick_bodies_from[:1])
    with seeds.temp_seed(seed):
        run_seeds = np.random.randint(low=0, high=10000, size=[repeat])
    for run_seed in run_seeds:

        with seeds.temp_seed(run_seed):
            train_on_bodies = np.random.choice(pick_bodies_from, size=[num_bodies], replace=False)
        print("\n# train on bodies: ", train_on_bodies)
        str_body_selected = ','.join([str(x) for x in train_on_bodies])
        custom_alignment = "" # default is aligned
        if method=="randomized":
            custom_alignment = g_ge.construct_random_alignment(num_bodies=num_bodies, max_joints=max_num_joints, seed=run_seed)
import pickle
from stable_baselines3 import PPO
from common import common
from common import pns
import numpy as np
exp_name = "PermutationFromGradientGPU"
all_jobs = []
common.shell_header(exp_name, "Use pns and pns_init, so we can have gradient for pns weights, we regulate the weight, align them to permutation matrix when needed.")
np.random.seed(1) # CPU version used 0.
common_argument = f"--train_bodies=100,101,102,103,104,105,106,107 --test_bodies=100,101,102,103,104,105,106,107 --pns --pns_init --train_steps=1e7 --tensorboard=tensorboard/{exp_name}"

seeds = np.random.randint(low=0, high=100000, size=[5])
for seed in seeds:
    cmd = f"sbatch -J {exp_name} submit-gpu.sh python 1.train.py --seed={seed} {common_argument}"
    print(cmd)
    job = {
        "seed": seed,
    }

common.shell_tail()
with open(f"output_data/tmp/all_jobs_{exp_name}.pickle", "wb") as f:
    pickle.dump(all_jobs,f)
示例#3
0
import pickle
from stable_baselines3 import PPO
from common import common
from common import pns
import numpy as np
exp_name = "PossibleOrder"
common.shell_header(
    exp_name,
    "Extract permutation from successful PNS, see if it will beat 10 random controls."
)
all_jobs = []

args = common.args
args.model_filename = "output_data/tmp/best_model.zip"
model = PPO.load(args.model_filename)
# print(model)
orders = []
orders_reverse = []
for i in range(8):
    # print(f"weight[{i}]")
    weights = model.policy.features_extractor.pns[i].weight.detach().numpy()
    weights_p = pns.permutation_matrix(weights)
    # print(weights_p)
    order = np.dot(weights_p, list(range(weights.shape[0])))
    order_reverse = np.argsort(order)
    orders.append(','.join([str(int(x)) for x in order]))
    orders_reverse.append(','.join([str(int(x)) for x in order_reverse]))
    # These two orders happen to be the same, because there is only one swap. But I am not sure which order is what we need.
    # print(order)
    # print("===========")