Пример #1
0
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import lab
    from lab import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy
    import numpy as np

    env = lab.make(env_id)
    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        while True:
            rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return
Пример #2
0
 def setup_policy(cls, policy_file, thisData, noise_stdev, path):
     from es_distributed.policies import MujocoPolicy
     pi = MujocoPolicy.Load(policy_file, extra_kwargs=None)
     if not thisData.parentOrNot:
         noiseIdx, noiseSign = thisData.child_op_data[1:3].astype(int)
         theta = pi.get_trainable_flat() + noiseSign * noise_stdev * gs.noise.get(noiseIdx, pi.num_params)
         pi.set_trainable_flat(theta)
     return pi
def main():
    sess = tf.InteractiveSession()

    env = gym.make("RoboschoolAnt-v1")

    ac_space = env.action_space
    ob_space = env.observation_space

    policy = MujocoPolicy(ob_space, ac_space, "uniform:10", 0.01, 'tanh', [256, 256], 'ff')

    ob_stat = RunningStat(
        env.observation_space.shape,
        eps=1e-2  # eps to prevent dividing by zero at the beginning when computing mean/stdev
    )

    policy.set_ob_stat(ob_stat.mean, ob_stat.std)
    ob = np.ones(ob_space.shape)#env.reset()
    sess.run(tf.initialize_variables(tf.all_variables()))

    a = policy.act(ob[None])
    print(a)
Пример #4
0
def main():
    """
    Tests if the outputs of the Keras model is equal (or similar) to the original implementation
    which used plain TensorFlow operations.

    The code in this main function starts 1000 iterations of the version with the TensorFlow Operations.
    The code in the function jupyter_cell can be started in the training Jupyter Notebook.
    :return:
    """

    env = gym.make("RoboschoolAnt-v1")

    ac_space = env.action_space
    ob_space = env.observation_space

    ob_stat = RunningStat(
        env.observation_space.shape,
        eps=
        1e-2  # eps to prevent dividing by zero at the beginning when computing mean/stdev
    )
    rews, lens = [], []
    for _ in range(1000):
        sess = tf.InteractiveSession()
        policy = MujocoPolicy(ob_space, ac_space, "continuous:", 0.01, 'tanh',
                              [256, 256], 'ff')
        policy.set_ob_stat(ob_stat.mean, ob_stat.std)
        sess.run(tf.initialize_variables(tf.all_variables()))
        r, t = policy.rollout(env)
        rews.append(r.sum())
        lens.append(t)
        tf.reset_default_graph()
        sess.close()

    print("RewMean", np.mean(rews))
    print("RewStd", np.std(rews))
    print("LenMean", np.mean(lens))
Пример #5
0
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy, ESAtariPolicy, GAAtariPolicy, GAGenesisPolicy
    from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind
    from es_distributed.es import get_ref_batch
    import numpy as np
    max_episode_steps = 4500
    # register retro games with max steps per episode to be played out
    register_all(max_episode_steps=max_episode_steps)

    is_atari_policy = True

    env = gym.make(env_id)
    env = sonic_util.sonicize_env(env)

    #if is_atari_policy:
    # env = wrap_deepmind(env)

    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        if is_atari_policy:
            pi = GAGenesisPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        #   pi.set_ref_batch(get_ref_batch(env, batch_size=128))
        else:
            pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)

        while True:
            if is_atari_policy:
                rews, t, score = pi.rollout(
                    env,
                    render=True,
                    random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))
            print(score)
            if record:
                env.close()
                return
Пример #6
0
def main(env_ids, policy_directory, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy, ESAtariPolicy, GAAtariPolicy
    from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind
    from es_distributed.es import get_ref_batch
    import es_distributed.ns as ns
    import numpy as np
    import os

    env_ids = env_ids.split(' ')

    is_atari_policy = "NoFrameskip" in env_ids[0]

    files = 0

    for policy_name in os.listdir(policy_directory):
        files += 1
        policy_file = "%s/%s" % (policy_directory, policy_name)
        pid = os.fork()
        if (pid == 0):
            env = []
            for i in range(0, len(env_ids)):
                env.append(gym.make(env_ids[i]))
                if env_ids[i].endswith('NoFrameskip-v4'):
                    env[i] = wrap_deepmind(env[i])

            if extra_kwargs:
                import json
                extra_kwargs = json.loads(extra_kwargs)

            with tf.Session():
                if is_atari_policy:
                    pi = GAAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
                    if pi.needs_ref_batch:
                        pi.set_ref_batch(get_ref_batch(env[0], batch_size=128))
                else:
                    pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)

                while True:
                    if is_atari_policy:
                        rews, t, novelty_vector = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)

    for i in range(0, files):
        os.wait()
Пример #7
0
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy, ESAtariPolicy, GAAtariPolicy
    from es_distributed.atari_wrappers import ScaledFloatFrame, wrap_deepmind
    from es_distributed.es import get_ref_batch
    import numpy as np

    is_atari_policy = "NoFrameskip" in env_id

    env = gym.make(env_id)
    if is_atari_policy:
        env = wrap_deepmind(env)

    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        if is_atari_policy:
            pi = GAAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
            # Commented out the policy below, as it is not the policy that we use.
            # pi = ESAtariPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
            # pi.set_ref_batch(get_ref_batch(env, batch_size=128))
        else:
            pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)

        while True:
            if is_atari_policy:
                rews, t, novelty_vector = pi.rollout(
                    env,
                    render=True,
                    random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return