Exemplo n.º 1
0
def test_gym_copy_reseeding_2():
    seeder = Seeder(123)
    if _GYM_INSTALLED:
        gym_env = gym.make("Acrobot-v1")
        # nested wrapping
        env = RescaleRewardWrapper(Wrapper(Wrapper(gym_env)), (0, 1))
        env.reseed(seeder)

        c_env = deepcopy(env)
        c_env.reseed()

        if deepcopy(env).is_online():
            traj1 = get_env_trajectory(env, 500)
            traj2 = get_env_trajectory(c_env, 500)
            assert not compare_trajectories(traj1, traj2)
def test_rescale_wrapper_seeding(ModelClass):

    seeding.set_global_seed(123)
    env1 = RescaleRewardWrapper(ModelClass(), (0, 1))

    seeding.set_global_seed(456)
    env2 = RescaleRewardWrapper(ModelClass(), (0, 1))

    seeding.set_global_seed(123)
    env3 = RescaleRewardWrapper(ModelClass(), (0, 1))

    if deepcopy(env1).is_online():
        traj1 = get_env_trajectory(env1, 500)
        traj2 = get_env_trajectory(env2, 500)
        traj3 = get_env_trajectory(env3, 500)

        assert not compare_trajectories(traj1, traj2)
        assert compare_trajectories(traj1, traj3)
Exemplo n.º 3
0
from rlberry.envs import Acrobot
from rlberry.agents import RSKernelUCBVIAgent
from rlberry.utils.logging import configure_logging
from rlberry.wrappers import RescaleRewardWrapper

configure_logging("DEBUG")

env = Acrobot()
# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSKernelUCBVIAgent(env,
                           n_episodes=500,
                           gamma=0.99,
                           horizon=300,
                           bonus_scale_factor=0.01,
                           min_dist=0.2,
                           bandwidth=0.05,
                           beta=1.0,
                           kernel_type="gaussian")
agent.fit()

env.enable_rendering()
state = env.reset()

time_before_done = 0
ended = False
for tt in range(4 * agent.horizon):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    if not done and not ended:
Exemplo n.º 4
0
""" 
 ===================== 
 Demo: demo_gym_wrapper 
 =====================
"""
from rlberry.envs import gym_make
from rlberry.agents import RSUCBVIAgent
from rlberry.wrappers import RescaleRewardWrapper

env = gym_make("Acrobot-v1")
env.reward_range = (-1.0, 0.0)  # missing in gym implementation

# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSUCBVIAgent(env,
                     gamma=0.99,
                     horizon=200,
                     bonus_scale_factor=0.1,
                     min_dist=0.2)
agent.fit(budget=10)

state = env.reset()
for tt in range(200):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    env.render()
env.close()