def play(): _, get_action = load_policy(SAVE_PATH) env = SawyerGraspEnv(n_substeps=5) n_episode = 10 ep_len, ep_ret, i = 0, 0, 0 max_ep_len = 1000 obs = env.reset() while i < n_episode: action = get_action(obs) print('Action: {}'.format(action)) obs, r, d, reward_info = env.step(action) # print('Control: {}'.format(env.sim.data.ctrl)) ep_len += 1 ep_ret += r print('dist_reward: {} grasp_reward: {} terminal_reward: {}'.format(reward_info['dist_reward'], reward_info['grasp_reward'], reward_info['terminal_reward'])) env.render() if d or (ep_len == max_ep_len): print('DONE: Episode Length: {} Episode Reward {}'.format(ep_len, ep_ret)) obs = env.reset() ep_len, ep_ret, r = 0, 0, 0 d = False i += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--file", type=str, default="", ) args = parser.parse_args() _, get_action = load_policy(args.file) env = gym.make('HumanoidRL-v0') run_policy(env, get_action)
def play(): _, get_action = load_policy(save_path) n_episode = 10 ep_len, ep_ret, i = 0, 0, 0 max_ep_len = 500 obs = env.reset() while i < n_episode: action = get_action(obs) # print('Action: {}'.format(action)) obs, r, d, reward_info = env.step(action) # print('Control: {}'.format(env.sim.data.ctrl)) ep_len += 1 ep_ret += r env.render() if d or (ep_len == max_ep_len): print('DONE: Episode Length: {} Episode Reward {}'.format( ep_len, ep_ret)) obs = env.reset() ep_len, ep_ret, r = 0, 0, 0 d = False i += 1
from spinup.utils.test_policy import load_policy, run_policy, \ load_policy_and_env # Register custom envs import gym_match_input_continuous import deepdrive_zero import gym TEST_STATIC_OBSTACLE = False # TODO: Move configs from env to python that train and test both use if TEST_STATIC_OBSTACLE: _, get_action = load_policy( '/home/c2/src/spinningup/data/dd2d-ppo-intersection/dd2d-ppo-intersection_s0', use_model_only=False) env = gym.make('deepdrive-2d-static-obstacle-no-g-pen-v0') else: p = '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-10_13-14.50/best_HorizonReturn/2020_03-11_11-36.27' if 'no-end-g' in p or 'no-contraint-g' in p or 'no-g' in p or 'no-constrain' in p: os.environ['END_ON_HARMFUL_GS'] = '0' os.environ['GFORCE_PENALTY_COEFF'] = '0' os.environ['JERK_PENALTY_COEFF'] = '0' if 'no-constrain' in p: os.environ['CONSTRAIN_CONTROLS'] = '0' if 'delta-controls' in p or 'deepdrive-2d-intersection-no-g-or-jerk2' in p: os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '1' else: os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '0'
from spinup.utils.test_policy import load_policy, run_policy modelpath = '/home/watate/[email protected]/Python/spinningup/data/ex2_ddpg_10ep_mountaincarcontinuous-v0/ex2_ddpg_10ep_mountaincarcontinuous-v0_s0' len = 0 episodes = 100 norender = False itr = -1 #Only for soft-actor critic deterministic = False # This part is unecessary because load_policy already restores tf_graph #model = spinup.utils.logx.restore_tf_graph(sess, modelpath) env, get_action = load_policy(modelpath, itr if itr >= 0 else 'last', deterministic) #run_policy(env, get_action, len, episodes, not(norender)) ''' for i_episode in range(20): observation = env.reset() while(True): #for t in range(100): env.render() action = env.action_space.sample() observation, reward, done, info = env.step(action) env.close() ''' for i_episode in range(episodes): observation = env.reset()
from spinup.utils.test_policy import load_policy, run_policy import gym import gym_foo _, get_action = load_policy('./output') env = gym.make('pro-v0') run_policy(env, get_action)
from spinup.utils.test_policy import load_policy, run_policy import envs import gym policy_name = 'ppo-100e-012-imgclass_as_input' _, get_action = load_policy('./data/{}/{}_s0'.format(policy_name, policy_name)) env = gym.make('MNISTClassEnv-v0', desired_outputs=[0, 1, 2]) run_policy(env, get_action)
def __init__(self): _, self.get_action = load_policy('log2/')
from spinup.utils.test_policy import load_policy, run_policy import gym import gym_geofriend2 _, get_action = load_policy('./spinupPpo') env = gym.make("geofriend2-v0") #Pyramid(), run_policy(env, get_action, max_ep_len=200)
from spinup.utils.test_policy import load_policy, run_policy import rocket_lander_gym import gym _, get_action = load_policy('oupput') env = gym.make('RocketLander-v0') run_policy(env, get_action)
from spinup.utils.test_policy import load_policy, run_policy import mycartCont import sys dirname = sys.argv[1:] _, get_action = load_policy(dirname[0]) env = mycartCont.MyCartContEnv() run_policy(env, get_action)
from spinup.utils.test_policy import load_policy, run_policy _, get_action = load_policy("output_dir2") import numpy as np import train double = train.Doubling() o = np.array([ 0, 2, 0, 0, 0, 3, 0, 3, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0 ]) a = get_action(o) print(a) #run_policy(double, get_action)
from spinup.utils.test_policy import load_policy, run_policy import gym import argparse if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, required=True) parser.add_argument('--save_dir', type=str, required=True) parser.add_argument("-d", '--deterministic', action="store_true") parser.add_argument("-e", "--episodes", type=int, default=100) parser.add_argument("-dr", "--dont_render", action="store_false") args = parser.parse_args() if args.dont_render == False: render_it = False else: render_it = True print(args.save_dir) _, get_action = load_policy(args.save_dir, deterministic=args.deterministic) env = gym.make(args.env) run_policy(env, get_action, num_episodes=args.episodes, render=render_it)