示例#1
0
    def test_vecnormalize(self):
        """
        Tests working of the VecNormalize wrapper
        """
        env = VectorEnv("CartPole-v1", 2)
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_reward=1.0,)
        env.reset()
        _, rewards, _, _ = env.step(env.sample())
        env.close()

        assert np.all(-1.0 <= rewards)
        assert np.all(1.0 >= rewards)
示例#2
0
    def test_vecenv_parallel(self):
        """
        Tests working of parallel VecEnvs
        """
        env = VectorEnv("CartPole-v1", 2, parallel=True)
        env.seed(0)
        observation_space, action_space = env.get_spaces()

        env.reset()
        env.step(env.sample())
        env.close()
示例#3
0
    def test_vecmonitor(self):
        """
        Tests working of the VecMonitor wrapper
        """
        env = VectorEnv("CartPole-v1", 2)
        env = VecMonitor(env, history_length=1)

        env.reset()
        _, _, _, info = env.step(env.sample())
        env.close()

        dones = [0, 0]
        while not dones[0]:
            _, _, dones, infos = env.step(env.sample())

        info = infos[0]["episode"]

        assert info["Episode Rewards"]
        assert info["Episode Length"]
        assert info["Time taken"]
示例#4
0
import numpy as np
import optuna
import torch

from genrl.agents.a2c.a2c import A2C
from genrl.environments.suite import VectorEnv
from genrl.trainers.onpolicy import OnPolicyTrainer

env = VectorEnv("CartPole-v0")


def tune_A2C(trial):
    # Define hyperparameters that are relevant for training
    # Choose a suggestion type and range (float/int and log/uniform)
    lr_value = trial.suggest_float("lr_value", 1e-5, 1e-2, log=True)
    lr_policy = trial.suggest_float("lr_policy", 1e-5, 1e-2, log=True)
    rollout_size = trial.suggest_int("rollout_size", 100, 10000, log=True)
    entropy_coeff = trial.suggest_float("entropy_coeff", 5e-4, 2e-1, log=True)

    agent = A2C(
        "mlp",
        env,
        lr_value=lr_value,
        lr_policy=lr_policy,
        rollout_size=rollout_size,
        entropy_coeff=entropy_coeff,
    )
    trainer = OnPolicyTrainer(
        agent,
        env,
        log_interval=10,
示例#5
0
import numpy as np
import optuna
import torch

from genrl.agents.td3.td3 import TD3
from genrl.environments.suite import VectorEnv
from genrl.trainers.offpolicy import OffPolicyTrainer

env = VectorEnv("Pendulum-v0")


def objective(trial):
    lr_value = trial.suggest_float("lr_value", 1e-6, 1e-1, log=True)
    lr_policy = trial.suggest_float("lr_policy", 1e-6, 1e-1, log=True)
    replay_size = trial.suggest_int("replay_size", 1e2, 1e5, log=True)
    max_ep_len = trial.suggest_int("max_ep_len", 1e3, 50000, log=True)

    agent = TD3("mlp",
                env,
                lr_value=lr_value,
                lr_policy=lr_policy,
                replay_size=replay_size)
    trainer = OffPolicyTrainer(
        agent,
        env,
        log_interval=5,
        epochs=100,
        max_timesteps=16500,
        evaluate_episodes=10,
        max_ep_len=max_ep_len,
    )
示例#6
0
    def test_vecenv_serial(self):
        """
        Tests working of serial VecEnvs
        """
        env = VectorEnv("CartPole-v1", 2, parallel=False)
        env.seed(0)
        ob, ac = env.observation_spaces, env.action_spaces

        env.reset()
        env.step(env.sample())
        env.close()