Пример #1
0
 Illustration of how to set up an MBQVI algorithm in rlberry.
 The environment chosen here is GridWorld environment.

.. video:: ../../video_plot_mbqvi.mp4
   :width: 600

"""
# sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_mbqvi.jpg'
from rlberry.agents.mbqvi import MBQVIAgent
from rlberry.envs.finite import GridWorld

params = {}
params["n_samples"] = 100  # samples per state-action pair
params["gamma"] = 0.99
params["horizon"] = None

env = GridWorld(7, 10, walls=((2, 2), (3, 3)), success_probability=0.6)
agent = MBQVIAgent(env, **params)
info = agent.fit()
print(info)

# evaluate policy in a deterministic version of the environment
env_eval = GridWorld(7, 10, walls=((2, 2), (3, 3)), success_probability=1.0)
env_eval.enable_rendering()
state = env_eval.reset()
for tt in range(50):
    action = agent.policy(state)
    next_s, _, _, _ = env_eval.step(action)
    state = next_s
video = env_eval.save_video("_video/video_plot_mbqvi.mp4")
Пример #2
0
from rlberry.agents.dynprog import ValueIterationAgent
from rlberry.envs.finite import GridWorld

env = GridWorld(7, 10, walls=((2, 2), (3, 3)))
agent = ValueIterationAgent(env, gamma=0.95)
info = agent.fit()
print(info)

env.enable_rendering()

state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_s, _, done, _ = env.step(action)
    if done:
        break
    state = next_s

env.save_video("gridworld.mp4", framerate=5)