示例#1
0
def test_env_seeding(env_name):

    seeding.set_global_seed(123)
    env1 = gym_make(env_name)

    seeding.set_global_seed(456)
    env2 = gym_make(env_name)

    seeding.set_global_seed(123)
    env3 = gym_make(env_name)

    if deepcopy(env1).is_online():
        traj1 = get_env_trajectory(env1, 500)
        traj2 = get_env_trajectory(env2, 500)
        traj3 = get_env_trajectory(env3, 500)

        assert not compare_trajectories(traj1, traj2)
        assert compare_trajectories(traj1, traj3)
示例#2
0
def test_env_seeding(env_name):
    seeder1 = Seeder(123)
    env1 = gym_make(env_name)
    env1.reseed(seeder1)

    seeder2 = Seeder(456)
    env2 = gym_make(env_name)
    env2.reseed(seeder2)

    seeder3 = Seeder(123)
    env3 = gym_make(env_name)
    env3.reseed(seeder3)

    if deepcopy(env1).is_online():
        traj1 = get_env_trajectory(env1, 500)
        traj2 = get_env_trajectory(env2, 500)
        traj3 = get_env_trajectory(env3, 500)

        assert not compare_trajectories(traj1, traj2)
        assert compare_trajectories(traj1, traj3)
示例#3
0
def test_dqn_agent(use_double_dqn, use_prioritized_replay):
    env = gym_make("CartPole-v0")
    agent = DQNAgent(
        env,
        learning_starts=5,
        eval_interval=75,
        train_interval=2,
        gradient_steps=-1,
        use_double_dqn=use_double_dqn,
        use_prioritized_replay=use_prioritized_replay,
    )
    agent.fit(budget=500)
示例#4
0
def test_dqn_agent():
    env = gym_make("CartPole-v0")
    params = {"n_episodes": 10}
    agent = DQNAgent(env, **params)
    agent.fit()
    agent.policy(env.observation_space.sample())
示例#5
0
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent
from rlberry.utils.logging import configure_logging
from rlberry.wrappers import RescaleRewardWrapper

configure_logging("DEBUG")

env = gym_make('Acrobot-v1')
env.reward_range = (-1.0, 0.0)  # missing in gym implementation

# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env, n_episodes=10, gamma=0.99, horizon=200,
                     bonus_scale_factor=0.1, min_dist=0.2)
agent.fit()

state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    env.render()
env.close()
    def fit(self, **kwargs):
        result = self.wrapped.learn(**kwargs)
        info = {}  # possibly store something from results
        return info

    def policy(self, observation, **kwargs):
        action, _state = self.wrapped.predict(observation, **kwargs)
        return action


#
# Traning one agent
#

env = gym_make('CartPole-v1')
agent = A2CAgent(env, 'MlpPolicy', verbose=1)
agent.fit(total_timesteps=1000)

obs = env.reset()
for i in range(1000):
    action = agent.policy(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        break
env.close()

#
# Traning several agents and comparing different hyperparams
#
示例#7
0
from rlberry.envs import gym_make
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter

from rlberry.agents.dqn import DQNAgent
from rlberry.utils.logging import configure_logging

configure_logging(level="DEBUG")

env = gym_make("CartPole-v0")
agent = DQNAgent(env, n_episodes=50, exploration_kwargs={"tau": 1000})
agent.set_writer(SummaryWriter())

print(f"Running DQN on {env}")
print(f"Visualize with tensorboard by \
running:\n$tensorboard --logdir {Path(agent.writer.log_dir).parent}")

agent.fit()

for episode in range(3):
    done = False
    state = env.reset()
    while not done:
        action = agent.policy(state)
        state, reward, done, _ = env.step(action)
        env.render()
env.close()
示例#8
0
seeder = Seeder(123)

# Each Seeder instance has a random number generator (rng)
# See https://numpy.org/doc/stable/reference/random/generator.html to check the
# methods available in rng.
seeder.rng.integers(5)
seeder.rng.normal()
print(type(seeder.rng))
# etc

# Environments and agents should be seeded using a single seeder,
# to ensure that their random number generators are independent.
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent

env = gym_make("MountainCar-v0")
env.reseed(seeder)

agent = RSUCBVIAgent(env)
agent.reseed(seeder)

# Environments and Agents have their own seeder and rng.
# When writing your own agents and inheriring from the Agent class,
# you should use agent.rng whenever you need to generate random numbers;
# the same applies to your environments.
# This is necessary to ensure reproducibility.
print("env seeder: ", env.seeder)
print("random sample from env rng: ", env.rng.normal())
print("agent seeder: ", agent.seeder)
print("random sample from agent rng: ", agent.rng.normal())