def load_old_ppo2(root_dir, env, env_name, index, transparent_params): try: from baselines.ppo2 import ppo2 as ppo2_old except ImportError as e: msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format( e) raise ImportError(msg) denv = FakeSingleSpacesVec(env, agent_id=index) possible_fnames = ["model.pkl", "final_model.pkl"] model_path = None for fname in possible_fnames: candidate_path = os.path.join(root_dir, fname) if os.path.exists(candidate_path): model_path = candidate_path if model_path is None: raise FileNotFoundError(f"Could not find model at '{root_dir}' " f"under any filename '{possible_fnames}'") graph = tf.Graph() sess = tf.Session(graph=graph) with sess.as_default(): with graph.as_default(): pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'") policy = ppo2_old.learn( network="mlp", env=denv, total_timesteps=1, seed=0, nminibatches=4, log_interval=1, save_interval=1, load_path=model_path, ) stable_policy = OpenAIToStablePolicy(policy, ob_space=denv.observation_space, ac_space=denv.action_space) model = PolicyToModel(stable_policy) try: normalize_path = os.path.join(root_dir, "normalize.pkl") with open(normalize_path, "rb") as f: old_vec_normalize = pickle.load(f) vec_normalize = vec_env.VecNormalize(denv, training=False) vec_normalize.obs_rms = old_vec_normalize.ob_rms vec_normalize.ret_rms = old_vec_normalize.ret_rms model = NormalizeModel(model, vec_normalize) pylog.info(f"Loaded normalization statistics from '{normalize_path}'") except FileNotFoundError: # We did not use VecNormalize during training, skip pass return model
def f(root_dir, env, env_name, index, transparent_params): denv = FakeSingleSpacesVec(env, agent_id=index) pylog.info( f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'") model = load_backward_compatible_model(cls, root_dir, denv) try: vec_normalize = load_vec_normalize(root_dir, denv) model = NormalizeModel(model, vec_normalize) except FileNotFoundError: # No saved VecNormalize, must have not trained with normalization. pass return model
def f(root_dir, env, env_name, index, transparent_params): denv = FakeSingleSpacesVec(env, agent_id=index) pylog.info( f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'") model = load_backward_compatible_model(cls, root_dir, denv) try: vec_normalize = VecNormalize(denv, training=False) vec_normalize.load_running_average(root_dir) model = NormalizeModel(model, vec_normalize) pylog.info(f"Loaded normalization statistics from '{root_dir}'") except FileNotFoundError: # We did not use VecNormalize during training, skip pass return model
def create_multi_agent_curried_policy_wrapper(mon_dir, env_name, num_envs, embed_index, max_steps, state_shape=None, add_zoo=False, num_zoo=5): def episode_limit(env): return time_limit.TimeLimit(env, max_episode_steps=max_steps) def env_fn(i): return make_env(env_name, seed=42, i=i, out_dir=mon_dir, pre_wrappers=[episode_limit]) vec_env = make_dummy_vec_multi_env( [lambda: env_fn(i) for i in range(num_envs)]) zoo = load_policy( policy_path="1", policy_type="zoo", env=vec_env, env_name=env_name, index=1 - embed_index, transparent_params=None, ) half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index) policies = [ _get_constant_policy(half_env, constant_value=half_env.action_space.sample(), state_shape=state_shape) for _ in range(10) ] if add_zoo: policies += [zoo] * num_zoo policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs) vec_env = CurryVecEnv(venv=vec_env, policy=policy_wrapper, agent_idx=embed_index, deterministic=False) vec_env = FlattenSingletonVecEnv(vec_env) yield vec_env, policy_wrapper, zoo policy_wrapper.close()
def load_random(path, env, env_name, index, transparent_params): denv = FakeSingleSpacesVec(env, agent_id=index) policy = RandomPolicy(denv) return PolicyToModel(policy)