def _fit_worker(args): """Create and fit an agent instance""" ( lock, agent_handler, agent_class, fit_budget, init_kwargs, fit_kwargs, writer, worker_logging_level, seeder, ) = args # reseed external libraries set_external_seed(seeder) # logging level in thread configure_logging(worker_logging_level) # Using a lock when creating envs and agents, to avoid problems # as here: https://github.com/openai/gym/issues/281 with lock: if agent_handler.is_empty(): # create agent agent = agent_class(**init_kwargs) # seed agent # TODO: check if extra reseeding here is necessary agent.reseed(seeder) agent_handler.set_instance(agent) # set writer if writer[0] is None: agent_handler.set_writer(None) elif ( writer[0] != "default" ): # 'default' corresponds to DefaultWriter created by Agent.__init__() writer_fn = writer[0] writer_kwargs = writer[1] agent_handler.set_writer(writer_fn(**writer_kwargs)) # fit agent agent_handler.fit(fit_budget, **fit_kwargs) # Remove writer after fit (prevent pickle problems), # unless the agent uses DefaultWriter if not isinstance(agent_handler.writer, DefaultWriter): agent_handler.set_writer(None) # remove from memory to avoid pickle issues agent_handler.dump() # garbage collector gc.collect() return agent_handler
from rlberry.envs import Acrobot from rlberry.agents import RSKernelUCBVIAgent from rlberry.utils.logging import configure_logging from rlberry.wrappers import RescaleRewardWrapper configure_logging("DEBUG") env = Acrobot() # rescake rewards to [0, 1] env = RescaleRewardWrapper(env, (0.0, 1.0)) agent = RSKernelUCBVIAgent(env, n_episodes=500, gamma=0.99, horizon=300, bonus_scale_factor=0.01, min_dist=0.2, bandwidth=0.05, beta=1.0, kernel_type="gaussian") agent.fit() env.enable_rendering() state = env.reset() time_before_done = 0 ended = False for tt in range(4 * agent.horizon): action = agent.policy(state) next_state, reward, done, _ = env.step(action) if not done and not ended:
__path__ = __import__('pkgutil').extend_path(__path__, __name__) # Initialize seeding from rlberry.seeding import seeding seeding.set_global_seed() # Initialize logging level from rlberry.utils.logging import configure_logging configure_logging(level="INFO")
import numpy as np from rlberry.agents.cem import CEMAgent from rlberry.envs.benchmarks.ball_exploration import PBall2D import rlberry.seeding as seeding from rlberry.utils.logging import configure_logging configure_logging(level="DEBUG") seeding.set_global_seed(123) env = PBall2D(p=np.inf, reward_smoothness=np.array([0.8]), reward_centers=[np.array([0.4, 0.4])]) n_episodes = 500 batch_size = 100 horizon = 25 gamma = 0.99 agent = CEMAgent(env, n_episodes, horizon, gamma, batch_size, percentile=70, learning_rate=0.01) agent.fit() env.enable_rendering() state = env.reset() for tt in range(4*horizon): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render()