model.learn(total_timesteps=training_timesteps, tb_log_name=tb_log_name, callback=eval_callback) model.save(save_model_path) env.save(save_vecnormalize_path) else: options['has_renderer'] = True register_gripper(UltrasoundProbeGripper) env_gym = GymWrapper(suite.make(env_id, **options)) env = DummyVecEnv([lambda : env_gym]) model = PPO.load(load_model_path) env = VecNormalize.load(load_vecnormalize_path, env) env.training = False env.norm_reward = False obs = env.reset() eprew = 0 while True: action, _states = model.predict(obs, deterministic=True) #action = env.action_space.sample() obs, reward, done, info = env.step(action) print(f'reward: {reward}') eprew += reward env_gym.render() if done: print(f'eprew: {eprew}') obs = env.reset()
def create_test_env(env_id, n_envs=1, stats_path=None, seed=0, log_dir='', should_render=True, hyperparams=None, env_kwargs=None): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor :return: (gym.Env) """ # HACK to save logs # if log_dir is not None: # os.environ["OPENAI_LOG_FORMAT"] = 'csv' # os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) # os.makedirs(log_dir, exist_ok=True) # logger.configure() # Create the environment and wrap it if necessary env_wrapper = get_wrapper_class(hyperparams) if 'env_wrapper' in hyperparams.keys(): del hyperparams['env_wrapper'] if n_envs > 1: # start_method = 'spawn' for thread safe env = SubprocVecEnv([ make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs) ]) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id: # HACK: force SubprocVecEnv for Bullet env env = SubprocVecEnv([ make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) ]) else: env = DummyVecEnv([ make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) ]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams['normalize']: print("Loading running average") print("with params: {}".format(hyperparams['normalize_kwargs'])) env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs']) if os.path.exists(os.path.join(stats_path, 'vecnormalize.pkl')): env = VecNormalize.load( os.path.join(stats_path, 'vecnormalize.pkl'), env) # Deactivate training and reward normalization env.training = False env.norm_reward = False else: # Legacy: env.load_running_average(stats_path) n_stack = hyperparams.get('frame_stack', 0) if n_stack > 0: print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env
def create_test_env( env_id, n_envs=1, stats_path=None, seed=0, log_dir="", should_render=True, hyperparams=None, env_kwargs=None ): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor :return: (gym.Env) """ # HACK to save logs # if log_dir is not None: # os.environ["OPENAI_LOG_FORMAT"] = 'csv' # os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) # os.makedirs(log_dir, exist_ok=True) # logger.configure() # Clean hyperparams, so the dict can be pass to the model constructor if True: keys_to_delete = ["n_envs", "n_timesteps", "env_wrapper", "callback", "frame_stack"] for key in keys_to_delete: delete_key(hyperparams, key) if n_envs > 1: # start_method = 'spawn' for thread safe env = SubprocVecEnv( [make_env(env_id, i, seed, log_dir, env_kwargs=env_kwargs) for i in range(n_envs)] ) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id or "Walker2D" in env_id: # HACK: force SubprocVecEnv for Bullet env env = DummyVecEnv([make_env(env_id, 127, seed, log_dir, env_kwargs=env_kwargs)]) else: env = DummyVecEnv([make_env(env_id, 127, seed, log_dir, env_kwargs=env_kwargs)]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams["normalize"]: # print("Loading running average") # print("with params: {}".format(hyperparams["normalize_kwargs"])) path_ = os.path.join(stats_path, "vecnormalize.pkl") if os.path.exists(path_): env = VecNormalize.load(path_, env) # Deactivate training and reward normalization env.training = False env.norm_reward = False else: raise ValueError(f"VecNormalize stats {path_} not found") n_stack = hyperparams.get("frame_stack", 0) if n_stack > 0: print(f"Stacking {n_stack} frames") env = VecFrameStack(env, n_stack) return env