def load_model(model_name, vec_env): model_params, params = BaseRLModel._load_from_file(model_name) new_model = PPO2(policy=gnn_fwd.GnnFwd, policy_kwargs=model_params['policy_kwargs'], env=vec_env) # update new model's parameters new_model.load_parameters(params) return new_model
def save_stable_model( output_dir: str, model: BaseRLModel, vec_normalize: Optional[VecNormalize] = None, ) -> None: """Serialize policy. Load later with `load_policy(..., policy_path=output_dir)`. Args: output_dir: Path to the save directory. policy: The stable baselines policy. vec_normalize: Optionally, a VecNormalize to save statistics for. `load_policy` automatically applies `NormalizePolicy` wrapper when loading. """ os.makedirs(output_dir, exist_ok=True) model.save(os.path.join(output_dir, 'model.pkl')) if vec_normalize is not None: vec_normalize.save_running_average(output_dir) tf.logging.info("Saved policy to %s", output_dir)
def run(self, model: BaseRLModel, episodes: int): """ Evaluate a model on its env for some time :param model: trained BaseRLModel :param episodes: n episodes :return: """ print("\n\tEVALUATION\n") env = model.get_env() env.test = True for i in range(episodes): rewards = [0 for i in range(env.steps)] actions = [0 for i in range(env.steps)] # get the first observation out of the environment state = env.reset() series = env.timeseries series_name = env.print_current_file(False) test_stats = env.test_stats # play through the env while not env.done: # _states are only useful when using LSTM policies action, _states = model.predict(state) state, reward, done, _ = env.step(action) # verify action if type(action) is np.ndarray: actions.append(int(action[0])) else: actions.append(int(action)) rewards.append(reward) # Append to all Statistics self.episodes_rewards.append(sum(rewards)) self.episodes_actions.append(actions) # plot the actions against its series plot(series, actions, self.logname + series_name) print("Rewards in Episode: {}\n are: {}".format( i, np.sum(rewards))) print("Maximum Reward: ", np.max(self.episodes_rewards), "\nAverage Reward: ", np.mean(self.episodes_rewards), "\n TestEpisodes: ", episodes)
def get_policy_shape(model: BaseRLModel) -> List[int]: """Returns a list with the number of neurons in each layer of the BaseRLModel""" output = [] output.append(model.observation_space.shape[0]) parameters_dict = model.get_parameters() for key in parameters_dict: cond1 = key.startswith('model/pi') cond2 = key.startswith('model/pi/logstd') cond3 = key.endswith("b:0") if cond1 and not cond2 and cond3: dim = parameters_dict[key].shape[0] output.append(dim) return output
def test_one(ckpt, test_env, n_episodes=100): # load the dictionary of parameters from file model_params, params = BaseRLModel._load_from_file(ckpt) policy_kwargs = model_params['policy_kwargs'] model = PPO2(policy=aoi_learner.gnn_policy.GNNPolicy, n_steps=10, policy_kwargs=policy_kwargs, env=test_env) # update new model's parameters model.load_parameters(params) print('Testing ' + ckpt + ' over ' + str(n_episodes) + ' episodes...') results = eval_model(test_env, model, n_episodes) mean_reward = np.mean(results['reward']) std_reward = np.std(results['reward']) return mean_reward, std_reward
if __name__ == '__main__': model_name = args.path if args.learner or len(model_name) > 0: import aoi_learner from aoi_learner.ppo2 import PPO2 from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines.common.base_class import BaseRLModel vec_env = DummyVecEnv([make_env]) # load the dictionary of parameters from file model_params, params = BaseRLModel._load_from_file(model_name) policy_kwargs = model_params['policy_kwargs'] model = PPO2(policy=aoi_learner.gnn_policy.GNNPolicy, n_steps=10, policy_kwargs=policy_kwargs, env=vec_env) # update new model's parameters model.load_parameters(params) print('Model loaded') else: model = None env = make_env()