def create_test_env(env_id, n_envs=1, stats_path=None, seed=0, log_dir='', should_render=True, hyperparams=None, env_kwargs=None): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor :return: (gym.Env) """ # HACK to save logs # if log_dir is not None: # os.environ["OPENAI_LOG_FORMAT"] = 'csv' # os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) # os.makedirs(log_dir, exist_ok=True) # logger.configure() # Create the environment and wrap it if necessary env_wrapper = get_wrapper_class(hyperparams) if 'env_wrapper' in hyperparams.keys(): del hyperparams['env_wrapper'] if n_envs > 1: # start_method = 'spawn' for thread safe env = SubprocVecEnv([ make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs) ]) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id: # HACK: force SubprocVecEnv for Bullet env env = SubprocVecEnv([ make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) ]) else: env = DummyVecEnv([ make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) ]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams['normalize']: print("Loading running average") print("with params: {}".format(hyperparams['normalize_kwargs'])) env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs']) if os.path.exists(os.path.join(stats_path, 'vecnormalize.pkl')): env = VecNormalize.load( os.path.join(stats_path, 'vecnormalize.pkl'), env) # Deactivate training and reward normalization env.training = False env.norm_reward = False else: # Legacy: env.load_running_average(stats_path) n_stack = hyperparams.get('frame_stack', 0) if n_stack > 0: print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env
def create_test_env( env_id, n_envs=1, stats_path=None, seed=0, log_dir="", should_render=True, hyperparams=None, env_kwargs=None ): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor :return: (gym.Env) """ # HACK to save logs # if log_dir is not None: # os.environ["OPENAI_LOG_FORMAT"] = 'csv' # os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) # os.makedirs(log_dir, exist_ok=True) # logger.configure() # Clean hyperparams, so the dict can be pass to the model constructor if True: keys_to_delete = ["n_envs", "n_timesteps", "env_wrapper", "callback", "frame_stack"] for key in keys_to_delete: delete_key(hyperparams, key) if n_envs > 1: # start_method = 'spawn' for thread safe env = SubprocVecEnv( [make_env(env_id, i, seed, log_dir, env_kwargs=env_kwargs) for i in range(n_envs)] ) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id or "Walker2D" in env_id: # HACK: force SubprocVecEnv for Bullet env env = DummyVecEnv([make_env(env_id, 127, seed, log_dir, env_kwargs=env_kwargs)]) else: env = DummyVecEnv([make_env(env_id, 127, seed, log_dir, env_kwargs=env_kwargs)]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams["normalize"]: # print("Loading running average") # print("with params: {}".format(hyperparams["normalize_kwargs"])) path_ = os.path.join(stats_path, "vecnormalize.pkl") if os.path.exists(path_): env = VecNormalize.load(path_, env) # Deactivate training and reward normalization env.training = False env.norm_reward = False else: raise ValueError(f"VecNormalize stats {path_} not found") n_stack = hyperparams.get("frame_stack", 0) if n_stack > 0: print(f"Stacking {n_stack} frames") env = VecFrameStack(env, n_stack) return env
model.learn(total_timesteps=training_timesteps, tb_log_name=tb_log_name, callback=eval_callback) model.save(save_model_path) env.save(save_vecnormalize_path) else: options['has_renderer'] = True register_gripper(UltrasoundProbeGripper) env_gym = GymWrapper(suite.make(env_id, **options)) env = DummyVecEnv([lambda : env_gym]) model = PPO.load(load_model_path) env = VecNormalize.load(load_vecnormalize_path, env) env.training = False env.norm_reward = False obs = env.reset() eprew = 0 while True: action, _states = model.predict(obs, deterministic=True) #action = env.action_space.sample() obs, reward, done, info = env.step(action) print(f'reward: {reward}') eprew += reward env_gym.render() if done: print(f'eprew: {eprew}')
def main(): # multiprocess environment n_cpu = 8 env = SubprocVecEnv( [lambda: gym.make('DYROSTocabi-v1') for i in range(n_cpu)]) env = VecNormalize(env, norm_obs=True, clip_obs=2.0, norm_reward=False, training=True) # n_cpu = 1 # env = gym.make('DYROSTocabi-v1') # env = DummyVecEnv([lambda: env]) # env = VecNormalize(env, norm_obs=True, clip_obs=2.0, norm_reward=False, training=True) model = PPO('MlpPolicy', env, verbose=1, n_steps=int(4096 / n_cpu), wandb_use=True) model.learn(total_timesteps=40000000) file_name = "ppo2_DYROSTocabi_" + str(datetime.datetime.now()) model.save(file_name) env.save(file_name + "_env.pkl") model.policy.to("cpu") for name, param in model.policy.state_dict().items(): weight_file_name = "./result/" + name + ".txt" np.savetxt(weight_file_name, param.data) np.savetxt("./result/obs_mean.txt", env.obs_rms.mean) np.savetxt("./result/obs_variance.txt", env.obs_rms.var) del model # remove to demonstrate saving and loading del env # file_name = "ppo2_DYROSTocabi_2021-02-27 02:20:20.015346" env = gym.make('DYROSTocabi-v1') env = DummyVecEnv([lambda: env]) env = VecNormalize.load(file_name + "_env.pkl", env) env.training = False model = PPO.load(file_name, env=env, wandb_use=False) model.policy.to("cpu") for name, param in model.policy.state_dict().items(): weight_file_name = "./result/" + name + ".txt" np.savetxt(weight_file_name, param.data) np.savetxt("./result/obs_mean.txt", env.obs_rms.mean) np.savetxt("./result/obs_variance.txt", env.obs_rms.var) #Enjoy trained agent obs = np.copy(env.reset()) epi_reward = 0 while True: action, _states = model.predict(obs, deterministic=True) obs, rewards, dones, info = env.step(action) env.render() epi_reward += rewards if dones: print("Episode Reward: ", epi_reward) epi_reward = 0