Illustration of how to set up an MBQVI algorithm in rlberry. The environment chosen here is GridWorld environment. .. video:: ../../video_plot_mbqvi.mp4 :width: 600 """ # sphinx_gallery_thumbnail_path = 'thumbnails/video_plot_mbqvi.jpg' from rlberry.agents.mbqvi import MBQVIAgent from rlberry.envs.finite import GridWorld params = {} params["n_samples"] = 100 # samples per state-action pair params["gamma"] = 0.99 params["horizon"] = None env = GridWorld(7, 10, walls=((2, 2), (3, 3)), success_probability=0.6) agent = MBQVIAgent(env, **params) info = agent.fit() print(info) # evaluate policy in a deterministic version of the environment env_eval = GridWorld(7, 10, walls=((2, 2), (3, 3)), success_probability=1.0) env_eval.enable_rendering() state = env_eval.reset() for tt in range(50): action = agent.policy(state) next_s, _, _, _ = env_eval.step(action) state = next_s video = env_eval.save_video("_video/video_plot_mbqvi.mp4")
from rlberry.agents.dynprog import ValueIterationAgent from rlberry.envs.finite import GridWorld env = GridWorld(7, 10, walls=((2, 2), (3, 3))) agent = ValueIterationAgent(env, gamma=0.95) info = agent.fit() print(info) env.enable_rendering() state = env.reset() for tt in range(200): action = agent.policy(state) next_s, _, done, _ = env.step(action) if done: break state = next_s env.save_video("gridworld.mp4", framerate=5)