Exemplo n.º 1
0
def test_vis2dwrapper():
    env = MountainCar()
    env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200)

    agent = RSUCBVIAgent(
        env,
        gamma=0.99,
        horizon=200,
        bonus_scale_factor=0.1,
        copy_env=False,
        min_dist=0.1,
    )

    agent.fit(budget=15)
    env.plot_trajectories(show=False)
    env.plot_trajectory_actions(show=False)
Exemplo n.º 2
0
from rlberry.envs import Acrobot
from rlberry.agents import RSUCBVIAgent
from rlberry.utils.logging import configure_logging
from rlberry.wrappers import RescaleRewardWrapper

configure_logging("DEBUG")

env = Acrobot()
# rescale rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env,
                     n_episodes=500,
                     gamma=0.99,
                     horizon=300,
                     bonus_scale_factor=0.01,
                     min_dist=0.25)
agent.fit()

env.enable_rendering()
state = env.reset()
for tt in range(4 * agent.horizon):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

env.save_video("acrobot.mp4")
Exemplo n.º 3
0
from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar
from rlberry.envs.benchmarks.ball_exploration import PBall2D
from rlberry.utils.logging import configure_logging

configure_logging("DEBUG")

for env, horizon in zip([MountainCar(), PBall2D()], [170, 50]):
    print("Running RS-UCBVI on %s" % env.name)
    agent = RSUCBVIAgent(env,
                         n_episodes=1000,
                         gamma=0.99,
                         horizon=horizon,
                         bonus_scale_factor=0.1)
    agent.fit()

    env.enable_rendering()
    state = env.reset()
    for tt in range(200):
        action = agent.policy(state)
        next_state, reward, done, _ = env.step(action)
        state = next_state

    env.render()
Exemplo n.º 4
0
""" 
 ===================== 
 Demo: demo_gym_wrapper 
 =====================
"""
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent
from rlberry.wrappers import RescaleRewardWrapper

env = gym_make("Acrobot-v1")
env.reward_range = (-1.0, 0.0)  # missing in gym implementation

# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env,
                     gamma=0.99,
                     horizon=200,
                     bonus_scale_factor=0.1,
                     min_dist=0.2)
agent.fit(budget=10)

state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    env.render()
env.close()
Exemplo n.º 5
0
                       n_bins_obs=20,
                       memory_size=100,
                       state_preprocess_fn=get_nroom_state_coord)
    agent = ValueIterationAgent(env.unwrapped,
                                gamma=0.99,
                                horizon=200,
                                copy_env=False)

else:
    env = MountainCar()
    env = Vis2dWrapper(env, n_bins_obs=20, memory_size=200)

    agent = RSUCBVIAgent(
        env,
        gamma=0.99,
        horizon=200,
        bonus_scale_factor=0.1,
        copy_env=False,
        min_dist=0.1,
    )

agent.fit(budget=100)

env.enable_rendering()
for ep in range(3):
    state = env.reset()
    for tt in range(agent.horizon):
        action = agent.policy(state)
        next_s, _, _, _ = env.step(action)
        state = next_s

try:
Exemplo n.º 6
0
"""
==================================================
A demo of RSUCBVI algorithm in MountainCar environment
==================================================
 Illustration of how to set up an RSUCBVI algorithm in rlberry.
 The environment chosen here is MountainCar environment.

.. video:: ../../video_plot_rsucbvi.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_rsucbvi.jpg'

from rlberry.agents import RSUCBVIAgent
from rlberry.envs.classic_control import MountainCar

env = MountainCar()
horizon = 170
print("Running RS-UCBVI on %s" % env.name)
agent = RSUCBVIAgent(env, gamma=0.99, horizon=horizon, bonus_scale_factor=0.1)
agent.fit(budget=500)

env.enable_rendering()
state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

video = env.save_video("_video/video_plot_rsucbvi.mp4")
Exemplo n.º 7
0
# See https://numpy.org/doc/stable/reference/random/generator.html to check the
# methods available in rng.
seeder.rng.integers(5)
seeder.rng.normal()
print(type(seeder.rng))
# etc

# Environments and agents should be seeded using a single seeder,
# to ensure that their random number generators are independent.
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent

env = gym_make("MountainCar-v0")
env.reseed(seeder)

agent = RSUCBVIAgent(env)
agent.reseed(seeder)

# Environments and Agents have their own seeder and rng.
# When writing your own agents and inheriring from the Agent class,
# you should use agent.rng whenever you need to generate random numbers;
# the same applies to your environments.
# This is necessary to ensure reproducibility.
print("env seeder: ", env.seeder)
print("random sample from env rng: ", env.rng.normal())
print("agent seeder: ", agent.seeder)
print("random sample from agent rng: ", agent.rng.normal())

# A seeder can spawn other seeders that are independent from it.
# This is useful to seed two different threads, using seeder1
# in the first thread, and seeder2 in the second thread.