示例#1
0
def _fit_worker(args):
    """Create and fit an agent instance"""
    (
        lock,
        agent_handler,
        agent_class,
        fit_budget,
        init_kwargs,
        fit_kwargs,
        writer,
        worker_logging_level,
        seeder,
    ) = args

    # reseed external libraries
    set_external_seed(seeder)

    # logging level in thread
    configure_logging(worker_logging_level)

    # Using a lock when creating envs and agents, to avoid problems
    # as here: https://github.com/openai/gym/issues/281
    with lock:
        if agent_handler.is_empty():
            # create agent
            agent = agent_class(**init_kwargs)
            # seed agent
            # TODO: check if extra reseeding here is necessary
            agent.reseed(seeder)
            agent_handler.set_instance(agent)

    # set writer
    if writer[0] is None:
        agent_handler.set_writer(None)
    elif (
        writer[0] != "default"
    ):  # 'default' corresponds to DefaultWriter created by Agent.__init__()
        writer_fn = writer[0]
        writer_kwargs = writer[1]
        agent_handler.set_writer(writer_fn(**writer_kwargs))
    # fit agent
    agent_handler.fit(fit_budget, **fit_kwargs)

    # Remove writer after fit (prevent pickle problems),
    # unless the agent uses DefaultWriter
    if not isinstance(agent_handler.writer, DefaultWriter):
        agent_handler.set_writer(None)

    # remove from memory to avoid pickle issues
    agent_handler.dump()

    # garbage collector
    gc.collect()

    return agent_handler
示例#2
0
from rlberry.envs import Acrobot
from rlberry.agents import RSKernelUCBVIAgent
from rlberry.utils.logging import configure_logging
from rlberry.wrappers import RescaleRewardWrapper

configure_logging("DEBUG")

env = Acrobot()
# rescake rewards to [0, 1]
env = RescaleRewardWrapper(env, (0.0, 1.0))

agent = RSKernelUCBVIAgent(env,
                           n_episodes=500,
                           gamma=0.99,
                           horizon=300,
                           bonus_scale_factor=0.01,
                           min_dist=0.2,
                           bandwidth=0.05,
                           beta=1.0,
                           kernel_type="gaussian")
agent.fit()

env.enable_rendering()
state = env.reset()

time_before_done = 0
ended = False
for tt in range(4 * agent.horizon):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    if not done and not ended:
示例#3
0
__path__ = __import__('pkgutil').extend_path(__path__, __name__)

# Initialize seeding
from rlberry.seeding import seeding
seeding.set_global_seed()

# Initialize logging level
from rlberry.utils.logging import configure_logging
configure_logging(level="INFO")
示例#4
0
import numpy as np
from rlberry.agents.cem import CEMAgent
from rlberry.envs.benchmarks.ball_exploration import PBall2D
import rlberry.seeding as seeding
from rlberry.utils.logging import configure_logging

configure_logging(level="DEBUG")

seeding.set_global_seed(123)

env = PBall2D(p=np.inf, reward_smoothness=np.array([0.8]),
              reward_centers=[np.array([0.4, 0.4])])
n_episodes = 500
batch_size = 100
horizon = 25
gamma = 0.99

agent = CEMAgent(env, n_episodes, horizon, gamma, batch_size,
                 percentile=70, learning_rate=0.01)
agent.fit()

env.enable_rendering()
state = env.reset()
for tt in range(4*horizon):
    action = agent.policy(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

env.render()