Пример #1
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--file",
        type=str,
        default="",
    )
    args = parser.parse_args()
    _, get_action = load_policy(args.file)
    env = gym.make('HumanoidRL-v0')
    run_policy(env, get_action)
Пример #2
0
def run(train_fn, env_config, net_config=None):
    mode = '--train' if not sys.argv[1:] else sys.argv[1]
    if mode == '--train':
        train_fn()
    elif mode == '--test':
        if not sys.argv[2:]:
            model_path = get_latest_model_path()  # TODO
        else:
            model_path = sys.argv[2]
        env = gym.make(env_config['env_name'])
        _, get_action = load_policy_and_env(fpath=model_path, deterministic=True,
                                            env=env, net_config=net_config)
        run_policy(env, get_action)
Пример #3
0
def run(train_fn, env_config, net_config=None):
    mode = '--train' if not sys.argv[1:] else sys.argv[1]
    if mode == '--train':
        train_fn()
    elif mode == '--play':
        from deepdrive_zero import player
        env_config['physics_steps_per_observation'] = 1
        player.start(env_config=env_config)
    elif mode == '--test':
        if not sys.argv[2:]:
            model_path = get_latest_model_path()  # TODO
        else:
            model_path = sys.argv[2]
        env = gym.make(env_config['env_name'])
        env.configure_env(env_config)
        _, get_action = load_policy_and_env(fpath=model_path, deterministic=True,
                                            env=env, net_config=net_config)
        run_policy(env, get_action)
if TEST_STATIC_OBSTACLE:
    _, get_action = load_policy(
        '/home/c2/src/spinningup/data/dd2d-ppo-intersection/dd2d-ppo-intersection_s0',
        use_model_only=False)

    env = gym.make('deepdrive-2d-static-obstacle-no-g-pen-v0')
else:
    p = '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-10_13-14.50/best_HorizonReturn/2020_03-11_11-36.27'
    if 'no-end-g' in p or 'no-contraint-g' in p or 'no-g' in p or 'no-constrain' in p:
        os.environ['END_ON_HARMFUL_GS'] = '0'
        os.environ['GFORCE_PENALTY_COEFF'] = '0'
        os.environ['JERK_PENALTY_COEFF'] = '0'
    if 'no-constrain' in p:
        os.environ['CONSTRAIN_CONTROLS'] = '0'
    if 'delta-controls' in p or 'deepdrive-2d-intersection-no-g-or-jerk2' in p:
        os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '1'
    else:
        os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '0'

    if 'one-waypoint' in p:
        env_name = 'deepdrive-2d-one-waypoint-v0'
    else:
        env_name = 'deepdrive-2d-intersection-w-gs-allow-decel-v0'
    _, get_action = load_policy_and_env(p, deterministic=True)
    # env = gym.make('deepdrive-2d-intersection-v0')
    env = gym.make(env_name)
    env.configure_env()

# env.unwrapped.physics_steps_per_observation = 1
run_policy(env, get_action)
def test_ppo(exp_dir, itr='last'):
    _, get_action, lstm = load_policy_and_env(exp_dir, itr=itr)
    env = hide_and_seek.make_env()
    run_policy(env, get_action, lstm=lstm)
Пример #6
0
import gym
import spinup
import tensorflow as tf
import spinup.utils.logx
from spinup.utils.test_policy import load_policy, run_policy
'''
# METHOD 1 (See Docs)
# BipedalWalker-v2
modelpath = '/home/watate/[email protected]/Python/spinningup/data/ex5_ddpg_100ep_bipedalwalker-v2/ex5_ddpg_100ep_bipedalwalker-v2_s0'

len = 0
episodes = 100
norender = False
#itr = -1
itr = -100

#Only for soft-actor critic
deterministic = False


# This part is unecessary because load_policy already restores tf_graph
#model = spinup.utils.logx.restore_tf_graph(sess, modelpath)


env, get_action = load_policy(modelpath, 
                                itr if itr >=0 else 'last',
                              deterministic)

run_policy(env, get_action, len, episodes, not(norender))
'''
Пример #7
0
from spinup.utils.test_policy import load_policy, run_policy
import gym
import gym_geofriend2

_, get_action = load_policy('./spinupPpo')
env = gym.make("geofriend2-v0")  #Pyramid(),
run_policy(env, get_action, max_ep_len=200)
Пример #8
0
from spinup.utils.test_policy import load_policy, run_policy
import gym
import argparse

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, required=True)
    parser.add_argument('--save_dir', type=str, required=True)
    parser.add_argument("-d", '--deterministic', action="store_true")
    parser.add_argument("-e", "--episodes", type=int, default=100)
    parser.add_argument("-dr", "--dont_render", action="store_false")
    args = parser.parse_args()

    if args.dont_render == False:
        render_it = False
    else:
        render_it = True

    print(args.save_dir)

    _, get_action = load_policy(args.save_dir,
                                deterministic=args.deterministic)
    env = gym.make(args.env)
    run_policy(env, get_action, num_episodes=args.episodes, render=render_it)