示例#1
0
def multiprocessing_example():
    # Multiprocessing: Unleashing the Power of Vectorized Environments

    def make_env(env_id, rank, seed=0):
        """
		Utility function for multiprocessed env.

		:param env_id: (str) the environment ID.
		:param num_env: (int) the number of environments you wish to have in subprocesses.
		:param seed: (int) the inital seed for RNG.
		:param rank: (int) index of the subprocess.
		"""
        def _init():
            env = gym.make(env_id)
            env.seed(seed + rank)
            return env

        set_random_seed(seed)
        return _init

    env_id = "CartPole-v1"
    num_cpu = 4  # Number of processes to use.
    # Create the vectorized environment.
    env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])

    # Stable Baselines provides you with make_vec_env() helper which does exactly the previous steps for you.
    # You can choose between 'DummyVecEnv' (usually faster) and 'SubprocVecEnv'.
    #env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)

    model = PPO("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=25_000)

    obs = env.reset()
    for _ in range(1000):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
示例#2
0
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed


def make_env(env_id, rank, seed=0):
    def _init():
        env = gym.make(env_id)
        env.seed(seed + rank)
        return env

    set_random_seed(seed)
    return _init


if __name__ == '__main__':
    env_id = "CartPole-v1"
    num_cpu = 4  # Number of processes to use
    env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])

    model = PPO('MlpPolicy', env, verbose=1)
    model.learn(total_timesteps=25000)

    obs = env.reset()
    for _ in range(1000):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
示例#3
0
def main():
    # multiprocess environment
    n_cpu = 8
    env = SubprocVecEnv(
        [lambda: gym.make('DYROSTocabi-v1') for i in range(n_cpu)])
    env = VecNormalize(env,
                       norm_obs=True,
                       clip_obs=2.0,
                       norm_reward=False,
                       training=True)

    # n_cpu = 1
    # env = gym.make('DYROSTocabi-v1')
    # env = DummyVecEnv([lambda: env])
    # env = VecNormalize(env, norm_obs=True, clip_obs=2.0, norm_reward=False, training=True)

    model = PPO('MlpPolicy',
                env,
                verbose=1,
                n_steps=int(4096 / n_cpu),
                wandb_use=True)
    model.learn(total_timesteps=40000000)

    file_name = "ppo2_DYROSTocabi_" + str(datetime.datetime.now())
    model.save(file_name)
    env.save(file_name + "_env.pkl")

    model.policy.to("cpu")
    for name, param in model.policy.state_dict().items():
        weight_file_name = "./result/" + name + ".txt"
        np.savetxt(weight_file_name, param.data)

    np.savetxt("./result/obs_mean.txt", env.obs_rms.mean)
    np.savetxt("./result/obs_variance.txt", env.obs_rms.var)

    del model  # remove to demonstrate saving and loading
    del env

    # file_name = "ppo2_DYROSTocabi_2021-02-27 02:20:20.015346"

    env = gym.make('DYROSTocabi-v1')
    env = DummyVecEnv([lambda: env])
    env = VecNormalize.load(file_name + "_env.pkl", env)
    env.training = False

    model = PPO.load(file_name, env=env, wandb_use=False)

    model.policy.to("cpu")
    for name, param in model.policy.state_dict().items():
        weight_file_name = "./result/" + name + ".txt"
        np.savetxt(weight_file_name, param.data)

    np.savetxt("./result/obs_mean.txt", env.obs_rms.mean)
    np.savetxt("./result/obs_variance.txt", env.obs_rms.var)
    #Enjoy trained agent
    obs = np.copy(env.reset())
    epi_reward = 0

    while True:
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()
        epi_reward += rewards

        if dones:
            print("Episode Reward: ", epi_reward)
            epi_reward = 0