def test_vecnormalize(self): """ Tests working of the VecNormalize wrapper """ env = VectorEnv("CartPole-v1", 2) env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_reward=1.0,) env.reset() _, rewards, _, _ = env.step(env.sample()) env.close() assert np.all(-1.0 <= rewards) assert np.all(1.0 >= rewards)
def test_vecenv_parallel(self): """ Tests working of parallel VecEnvs """ env = VectorEnv("CartPole-v1", 2, parallel=True) env.seed(0) observation_space, action_space = env.get_spaces() env.reset() env.step(env.sample()) env.close()
def test_vecmonitor(self): """ Tests working of the VecMonitor wrapper """ env = VectorEnv("CartPole-v1", 2) env = VecMonitor(env, history_length=1) env.reset() _, _, _, info = env.step(env.sample()) env.close() dones = [0, 0] while not dones[0]: _, _, dones, infos = env.step(env.sample()) info = infos[0]["episode"] assert info["Episode Rewards"] assert info["Episode Length"] assert info["Time taken"]
import numpy as np import optuna import torch from genrl.agents.a2c.a2c import A2C from genrl.environments.suite import VectorEnv from genrl.trainers.onpolicy import OnPolicyTrainer env = VectorEnv("CartPole-v0") def tune_A2C(trial): # Define hyperparameters that are relevant for training # Choose a suggestion type and range (float/int and log/uniform) lr_value = trial.suggest_float("lr_value", 1e-5, 1e-2, log=True) lr_policy = trial.suggest_float("lr_policy", 1e-5, 1e-2, log=True) rollout_size = trial.suggest_int("rollout_size", 100, 10000, log=True) entropy_coeff = trial.suggest_float("entropy_coeff", 5e-4, 2e-1, log=True) agent = A2C( "mlp", env, lr_value=lr_value, lr_policy=lr_policy, rollout_size=rollout_size, entropy_coeff=entropy_coeff, ) trainer = OnPolicyTrainer( agent, env, log_interval=10,
import numpy as np import optuna import torch from genrl.agents.td3.td3 import TD3 from genrl.environments.suite import VectorEnv from genrl.trainers.offpolicy import OffPolicyTrainer env = VectorEnv("Pendulum-v0") def objective(trial): lr_value = trial.suggest_float("lr_value", 1e-6, 1e-1, log=True) lr_policy = trial.suggest_float("lr_policy", 1e-6, 1e-1, log=True) replay_size = trial.suggest_int("replay_size", 1e2, 1e5, log=True) max_ep_len = trial.suggest_int("max_ep_len", 1e3, 50000, log=True) agent = TD3("mlp", env, lr_value=lr_value, lr_policy=lr_policy, replay_size=replay_size) trainer = OffPolicyTrainer( agent, env, log_interval=5, epochs=100, max_timesteps=16500, evaluate_episodes=10, max_ep_len=max_ep_len, )
def test_vecenv_serial(self): """ Tests working of serial VecEnvs """ env = VectorEnv("CartPole-v1", 2, parallel=False) env.seed(0) ob, ac = env.observation_spaces, env.action_spaces env.reset() env.step(env.sample()) env.close()