Python BaseRLModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: stable_baselines.common.base_class

Класс/Тип: BaseRLModel

Примеров на hotexamples.com: 6

Python BaseRLModel - 6 примеров найдено. Это лучшие примеры Python кода для stable_baselines.common.base_class.BaseRLModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

_load_from_file(3)

get_env(1)

get_parameters(1)

predict(1)

save(1)

Пример #1

Показать файл

def load_model(model_name, vec_env):
    model_params, params = BaseRLModel._load_from_file(model_name)

    new_model = PPO2(policy=gnn_fwd.GnnFwd,
                     policy_kwargs=model_params['policy_kwargs'],
                     env=vec_env)

    # update new model's parameters
    new_model.load_parameters(params)
    return new_model

Пример #2

Показать файл

def save_stable_model(
    output_dir: str,
    model: BaseRLModel,
    vec_normalize: Optional[VecNormalize] = None,
) -> None:
    """Serialize policy.

    Load later with `load_policy(..., policy_path=output_dir)`.

    Args:
        output_dir: Path to the save directory.
        policy: The stable baselines policy.
        vec_normalize: Optionally, a VecNormalize to save statistics for.
            `load_policy` automatically applies `NormalizePolicy` wrapper
            when loading.
    """
    os.makedirs(output_dir, exist_ok=True)
    model.save(os.path.join(output_dir, 'model.pkl'))
    if vec_normalize is not None:
        vec_normalize.save_running_average(output_dir)
    tf.logging.info("Saved policy to %s", output_dir)

Пример #3

Показать файл

    def run(self, model: BaseRLModel, episodes: int):
        """
        Evaluate a model on its env for some time
        :param model: trained BaseRLModel
        :param episodes: n episodes
        :return:
        """
        print("\n\tEVALUATION\n")
        env = model.get_env()
        env.test = True
        for i in range(episodes):
            rewards = [0 for i in range(env.steps)]
            actions = [0 for i in range(env.steps)]
            # get the first observation out of the environment
            state = env.reset()
            series = env.timeseries
            series_name = env.print_current_file(False)
            test_stats = env.test_stats
            # play through the env
            while not env.done:
                # _states are only useful when using LSTM policies
                action, _states = model.predict(state)
                state, reward, done, _ = env.step(action)
                # verify action
                if type(action) is np.ndarray:
                    actions.append(int(action[0]))
                else:
                    actions.append(int(action))
                rewards.append(reward)
            # Append to all Statistics
            self.episodes_rewards.append(sum(rewards))
            self.episodes_actions.append(actions)
            # plot the actions against its series
            plot(series, actions, self.logname + series_name)

            print("Rewards in Episode: {}\n are: {}".format(
                i, np.sum(rewards)))
        print("Maximum Reward: ",
              np.max(self.episodes_rewards), "\nAverage Reward: ",
              np.mean(self.episodes_rewards), "\n TestEpisodes: ", episodes)

Пример #4

Показать файл

Файл: keras_from_rl.py Проект: renan-cunha/PRECON

def get_policy_shape(model: BaseRLModel) -> List[int]:
    """Returns a list with the number of neurons in each layer of the
    BaseRLModel"""
    output = []
    output.append(model.observation_space.shape[0])
    parameters_dict = model.get_parameters()
    for key in parameters_dict:
        cond1 = key.startswith('model/pi')
        cond2 = key.startswith('model/pi/logstd')
        cond3 = key.endswith("b:0")
        if cond1 and not cond2 and cond3:
            dim = parameters_dict[key].shape[0]
            output.append(dim)
    return output

Пример #5

Показать файл

Файл: test_all.py Проект: utkuoguzman/Learning-Connectivity

def test_one(ckpt, test_env, n_episodes=100):
    # load the dictionary of parameters from file
    model_params, params = BaseRLModel._load_from_file(ckpt)
    policy_kwargs = model_params['policy_kwargs']

    model = PPO2(policy=aoi_learner.gnn_policy.GNNPolicy,
                 n_steps=10,
                 policy_kwargs=policy_kwargs,
                 env=test_env)

    # update new model's parameters
    model.load_parameters(params)
    print('Testing ' + ckpt + ' over ' + str(n_episodes) + ' episodes...')
    results = eval_model(test_env, model, n_episodes)

    mean_reward = np.mean(results['reward'])
    std_reward = np.std(results['reward'])
    return mean_reward, std_reward

Пример #6

Показать файл


if __name__ == '__main__':

    model_name = args.path

    if args.learner or len(model_name) > 0:
        import aoi_learner
        from aoi_learner.ppo2 import PPO2
        from stable_baselines.common.vec_env import DummyVecEnv
        from stable_baselines.common.base_class import BaseRLModel

        vec_env = DummyVecEnv([make_env])

        # load the dictionary of parameters from file
        model_params, params = BaseRLModel._load_from_file(model_name)
        policy_kwargs = model_params['policy_kwargs']

        model = PPO2(policy=aoi_learner.gnn_policy.GNNPolicy,
                     n_steps=10,
                     policy_kwargs=policy_kwargs,
                     env=vec_env)

        # update new model's parameters
        model.load_parameters(params)
        print('Model loaded')
    else:
        model = None

    env = make_env()