def load_expert_policies(self, sess):

        for task in range(self.numExpertPolicies):

            print("######LOADING EXPERT " + str(task) + "##############")

            policy = GaussianMLPPolicy(name='expert' + str(task),
                                       env_spec=self.env.spec,
                                       hidden_nonlinearity=tf.nn.relu,
                                       hidden_sizes=(100, 100))
            weights = pickle.load(
                open(
                    self.expertDataLoc + "Task_" + str(task) + "/itr_" +
                    str(self.expertDataItr) + ".pkl", 'rb'))
            for key in policy.mean_params:
                sess.run(
                    tf.assign(policy.mean_params[key],
                              weights['mean_params'][key]))
            sess.run(tf.assign(policy.std_params, weights['std_params']))
            self.expertPolicies[task] = policy
示例#2
0
        exp_prefix = 'posticml_trpo_maml_ant' + str(max_path_length)
        if oracle:
            env = TfEnv(normalize(AntEnvOracle()))
        else:
            env = TfEnv(normalize(AntEnvRand()))
    elif task_var == 2:
        print("HERE")
        task_var = 'pos'
        exp_prefix = 'posticml_trpo_maml_antpos_' + str(max_path_length)
        if oracle:
            env = TfEnv(normalize(AntEnvRandGoalOracle()))
        else:
            env = TfEnv(normalize(AntEnvRandGoal()))
    policy = GaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=max_path_length * 100,  # number of trajs for grad update
        max_path_length=max_path_length,
        n_itr=2000,
        use_maml=use_maml,
        step_size=0.01,
        plot=False,
    )
for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files):
    avg_returns = []
    for goal in goals:
        goal = list(goal)


        if initial_params_file is not None and 'oracle' in initial_params_file:
            env = normalize(PointEnvRandGoalOracle(goal=goal))
            n_itr = 1
        else:
            env = normalize(PointEnvRandGoal(goal=goal))
            n_itr = 5
        env = TfEnv(env)
        policy = GaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_sizes=(100, 100),
        )


        if initial_params_file is not None:
            policy = None

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,
            policy=policy,
            load_policy=initial_params_file,
            baseline=baseline,
            batch_size=4000,  # 2x
            max_path_length=100,
示例#4
0
all_avg_returns = []
for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files):
    avg_returns = []
    for goal_i, goal in zip(range(len(goals)), goals):


        if initial_params_file is not None and 'oracle' in initial_params_file:
            env = normalize(AntEnvDirecOracle())
            n_itr = 1
        else:
            env = normalize(AntEnvRandDirec())
            n_itr = 4
        env = TfEnv(env)
        policy = GaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
        )

        if initial_params_file is not None:
            policy = None

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,
            policy=policy,
            load_policy=initial_params_file,
            baseline=baseline,
            batch_size=8000,
            max_path_length=200,
            n_itr=n_itr,
示例#5
0
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import stub, run_experiment_lite

stub(globals())

import tensorflow as tf

#env = normalize(PointEnvRandGoal())
env = normalize(PointEnvRandGoalOracle())
#env = normalize(HalfCheetahEnv())
#env = normalize(Walker2DEnv())
env = TfEnv(env)
policy = GaussianMLPPolicy(
    name='policy',
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    #hidden_sizes=(32, 32)
    #hidden_nonlinearity=tf.nn.relu,
    hidden_sizes=(100, 100))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=500,  # was 4k
    max_path_length=5,
    n_itr=100,
    discount=0.99,
    step_size=0.01,
示例#6
0
from rllab.misc.instrument import stub, run_experiment_lite

stub(globals())

#env = normalize(SwimmerEnv())
env = normalize(SwimmerRandGoalOracleEnv())
#env = normalize(SwimmerRandGoalEnv())

max_path_length = 100
#env = normalize(HalfCheetahEnv())
#env = normalize(Walker2DEnv())
if use_tf:
    env = TfEnv(env)
    policy = GaussianMLPPolicy(
        name='policy',
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        #hidden_sizes=(32, 32)
        hidden_sizes=(100, 100))
else:
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(100, 100))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=max_path_length * 10,  # was 4k
示例#7
0
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from examples.point_env import PointEnv
from examples.point_env_randgoal import PointEnvRandGoal
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import stub, run_experiment_lite
#from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
#from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
from sandbox.rocky.tf.policies.minimal_gauss_mlp_policy import GaussianMLPPolicy
from sandbox.rocky.tf.envs.base import TfEnv

stub(globals())

env = TfEnv(normalize(PointEnv()))
#env = TfEnv(normalize(PointEnvRandGoal()))
policy = GaussianMLPPolicy(
    name="policy",
    env_spec=env.spec,
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = VPG(
    env=env,
    policy=policy,
    baseline=baseline,
    #plot=True,
)
run_experiment_lite(
    algo.train(),
    n_parallel=1,
    snapshot_mode="last",
    seed=1,
    #plot=True,
)
示例#8
0
for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files):
    avg_returns = []

    for goal in goals:
        print('goal = ()', goal/3.141692*180)
        if initial_params_file is not None and 'oracle' in initial_params_file:
            env = normalize(AntEnvOracle())
            n_itr = 1
        else:
            env = normalize(CellRobotRandDirectEnv())
            n_itr = 5
        env = TfEnv(env)
        policy = GaussianMLPPolicy(  # random policy
            name='policy',
            env_spec=env.spec,
            hidden_nonlinearity=tf.nn.relu,
            output_nonlinearity=tf.nn.sigmoid,
            hidden_sizes=(64, 64),
        )
        

        if initial_params_file is not None:
            policy = None

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = VPG(
            env=env,
            policy=policy,
            load_policy=initial_params_file,
            baseline=baseline,
            batch_size=400,  # 2x
示例#9
0
stub(globals())
oracle = False
random = True

if oracle:
    env = TfEnv(normalize(SwimmerRandGoalOracleEnv()))
    batch_size = 200
elif random:
    env = TfEnv(normalize(SwimmerRandGoalEnv()))
    batch_size = 200
else:
    env = TfEnv(normalize(SwimmerEnv()))
    batch_size = 20
policy = GaussianMLPPolicy(
    name="policy",
    env_spec=env.spec,
    hidden_sizes=(100,100),
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
#baseline = ZeroBaseline(env_spec=env.spec)
algo = VPG(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=500*batch_size,
    max_path_length=500,
    n_itr=500,
    #plot=True,
    optimizer_args={'tf_optimizer_args':{'learning_rate': 1e-3}},
)
run_experiment_lite(