Пример #1
0
def play():
    _, get_action = load_policy(SAVE_PATH)
    env = SawyerGraspEnv(n_substeps=5)
    n_episode = 10
    ep_len, ep_ret, i = 0, 0, 0
    max_ep_len = 1000
    obs = env.reset()

    while i < n_episode:
        action = get_action(obs)
        print('Action: {}'.format(action))
        obs, r, d, reward_info = env.step(action)
        # print('Control: {}'.format(env.sim.data.ctrl))
        ep_len += 1
        ep_ret += r
        print('dist_reward: {} grasp_reward: {} terminal_reward: {}'.format(reward_info['dist_reward'],
                                                                            reward_info['grasp_reward'],
                                                                            reward_info['terminal_reward']))
        env.render()
        if d or (ep_len == max_ep_len):
            print('DONE: Episode Length: {} Episode Reward {}'.format(ep_len, ep_ret))
            obs = env.reset()
            ep_len, ep_ret, r = 0, 0, 0
            d = False
            i += 1
Пример #2
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--file",
        type=str,
        default="",
    )
    args = parser.parse_args()
    _, get_action = load_policy(args.file)
    env = gym.make('HumanoidRL-v0')
    run_policy(env, get_action)
Пример #3
0
def play():
    _, get_action = load_policy(save_path)
    n_episode = 10
    ep_len, ep_ret, i = 0, 0, 0
    max_ep_len = 500
    obs = env.reset()

    while i < n_episode:
        action = get_action(obs)
        # print('Action: {}'.format(action))
        obs, r, d, reward_info = env.step(action)
        # print('Control: {}'.format(env.sim.data.ctrl))
        ep_len += 1
        ep_ret += r

        env.render()
        if d or (ep_len == max_ep_len):
            print('DONE: Episode Length: {} Episode Reward {}'.format(
                ep_len, ep_ret))
            obs = env.reset()
            ep_len, ep_ret, r = 0, 0, 0
            d = False
            i += 1
from spinup.utils.test_policy import load_policy, run_policy, \
    load_policy_and_env

# Register custom envs
import gym_match_input_continuous
import deepdrive_zero
import gym

TEST_STATIC_OBSTACLE = False


# TODO: Move configs from env to python that train and test both use

if TEST_STATIC_OBSTACLE:
    _, get_action = load_policy(
        '/home/c2/src/spinningup/data/dd2d-ppo-intersection/dd2d-ppo-intersection_s0',
        use_model_only=False)

    env = gym.make('deepdrive-2d-static-obstacle-no-g-pen-v0')
else:
    p = '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-10_13-14.50/best_HorizonReturn/2020_03-11_11-36.27'
    if 'no-end-g' in p or 'no-contraint-g' in p or 'no-g' in p or 'no-constrain' in p:
        os.environ['END_ON_HARMFUL_GS'] = '0'
        os.environ['GFORCE_PENALTY_COEFF'] = '0'
        os.environ['JERK_PENALTY_COEFF'] = '0'
    if 'no-constrain' in p:
        os.environ['CONSTRAIN_CONTROLS'] = '0'
    if 'delta-controls' in p or 'deepdrive-2d-intersection-no-g-or-jerk2' in p:
        os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '1'
    else:
        os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '0'
Пример #5
0
from spinup.utils.test_policy import load_policy, run_policy

modelpath = '/home/watate/[email protected]/Python/spinningup/data/ex2_ddpg_10ep_mountaincarcontinuous-v0/ex2_ddpg_10ep_mountaincarcontinuous-v0_s0'

len = 0
episodes = 100
norender = False
itr = -1

#Only for soft-actor critic
deterministic = False

# This part is unecessary because load_policy already restores tf_graph
#model = spinup.utils.logx.restore_tf_graph(sess, modelpath)

env, get_action = load_policy(modelpath, itr if itr >= 0 else 'last',
                              deterministic)

#run_policy(env, get_action, len, episodes, not(norender))
'''
for i_episode in range(20):
    observation = env.reset()
    while(True): #for t in range(100):
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
env.close()
'''

for i_episode in range(episodes):
    observation = env.reset()
Пример #6
0
from spinup.utils.test_policy import load_policy, run_policy
import gym
import gym_foo
_, get_action = load_policy('./output')
env = gym.make('pro-v0')
run_policy(env, get_action)
Пример #7
0
from spinup.utils.test_policy import load_policy, run_policy
import envs
import gym

policy_name = 'ppo-100e-012-imgclass_as_input'
_, get_action = load_policy('./data/{}/{}_s0'.format(policy_name, policy_name))
env = gym.make('MNISTClassEnv-v0', desired_outputs=[0, 1, 2])
run_policy(env, get_action)
Пример #8
0
 def __init__(self):
     _, self.get_action = load_policy('log2/')
Пример #9
0
from spinup.utils.test_policy import load_policy, run_policy
import gym
import gym_geofriend2

_, get_action = load_policy('./spinupPpo')
env = gym.make("geofriend2-v0")  #Pyramid(),
run_policy(env, get_action, max_ep_len=200)
Пример #10
0
from spinup.utils.test_policy import load_policy, run_policy
import rocket_lander_gym
import gym

_, get_action = load_policy('oupput')
env = gym.make('RocketLander-v0')
run_policy(env, get_action)
Пример #11
0
from spinup.utils.test_policy import load_policy, run_policy
import mycartCont
import sys

dirname = sys.argv[1:]

_, get_action = load_policy(dirname[0])
env = mycartCont.MyCartContEnv()
run_policy(env, get_action)
Пример #12
0
from spinup.utils.test_policy import load_policy, run_policy

_, get_action = load_policy("output_dir2")
import numpy as np
import train

double = train.Doubling()
o = np.array([
    0, 2, 0, 0, 0, 3, 0, 3, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
    0, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
])
a = get_action(o)
print(a)
#run_policy(double, get_action)
Пример #13
0
from spinup.utils.test_policy import load_policy, run_policy
import gym
import argparse

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, required=True)
    parser.add_argument('--save_dir', type=str, required=True)
    parser.add_argument("-d", '--deterministic', action="store_true")
    parser.add_argument("-e", "--episodes", type=int, default=100)
    parser.add_argument("-dr", "--dont_render", action="store_false")
    args = parser.parse_args()

    if args.dont_render == False:
        render_it = False
    else:
        render_it = True

    print(args.save_dir)

    _, get_action = load_policy(args.save_dir,
                                deterministic=args.deterministic)
    env = gym.make(args.env)
    run_policy(env, get_action, num_episodes=args.episodes, render=render_it)