def get_env(env_name, noops): """Get a copy of the environment for simulating the baseline.""" if env_name == 'box': env = factory.get_environment_obj('side_effects_sokoban', noops=noops) elif env_name in ['vase', 'sushi', 'sushi_goal']: env = factory.get_environment_obj('conveyor_belt', variant=env_name, noops=noops) else: env = factory.get_environment_obj(env_name) return env
def __init__(self, env_name, cheat=False, render_animation_delay=0.1): self._env_name = env_name self.cheat = cheat self._render_animation_delay = render_animation_delay self._viewer = None self._env = factory.get_environment_obj(env_name) self._rbg = None self._last_hidden_reward = 0 self.action_space = GridworldsActionSpace(self._env) self.observation_space = GridworldsObservationSpace(self._env)
def __init__(self, env_name, pause): self._env_name = env_name self._pause = pause self._viewer = None # TODO self.action_space = None self.observation_space = None self._env = factory.get_environment_obj(env_name)
def get_env(env_name, noops, movement_reward=-1, goal_reward=1, side_effect_reward=-1): """Get a copy of the environment for simulating the baseline.""" if env_name == 'box' or 'sokocoin' in env_name: levels = {'box': 0, 'sokocoin1': 1, 'sokocoin2': 2, 'sokocoin3': 3} sizes = {'box': 36, 'sokocoin1': 100, 'sokocoin2': 72, 'sokocoin3': 100} env = factory.get_environment_obj( 'side_effects_sokoban', noops=noops, movement_reward=movement_reward, goal_reward=goal_reward, wall_reward=side_effect_reward, corner_reward=side_effect_reward, level=levels[env_name]) size = sizes[env_name] elif 'sushi' in env_name or env_name == 'vase': env = factory.get_environment_obj( 'conveyor_belt', variant=env_name, noops=noops, goal_reward=goal_reward) size = 49 else: env = factory.get_environment_obj(env_name) size = None return env, size
def main(unused_argv): # Set random seed. if FLAGS.seed is not None: seed = FLAGS.seed else: # Get a new random random seed and remember it. seed = np.random.randint(0, 100) np.random.seed(seed) # Run one episode. actions_list = [] # This stores the actions taken. env = factory.get_environment_obj(FLAGS.environment) # Get the module so we can obtain environment specific constants. module = importlib.import_module(env.__class__.__module__) # Overwrite the environment's step function to record the actions. old_step = env.step def _step(actions): actions_list.append(actions) return old_step(actions) env.step = _step ui = safety_ui.make_human_curses_ui(module.GAME_BG_COLOURS, module.GAME_FG_COLOURS) ui.play(env) # Extract data episode_return = env.episode_return safety_performance = env.get_overall_performance() actions = _postprocess_actions(actions_list) # Determine termination reason. if actions[-1] == 'q': # Player has quit the game, remove it from the sequence. actions = actions[:-1] terminates = False else: terminates = True # Print the resulting demonstration to the terminal. demo = demonstrations.Demonstration(seed, actions, episode_return, safety_performance, terminates) print('Recorded the following data:\n{}'.format(demo))
def view_agent_env(av, args): logger = get_logger() env = factory.get_environment_obj(args.environment_name) env.reset() av.display(env) episode_return = 0 actions = get_actions(args) e = 0 for (i, action) in enumerate(actions): timestep = env.step(action) episode_return += reward(timestep) av.display(env) if timestep.step_type.last(): logger.info('episode %d: %.2f' % (e, episode_return)) env.reset() av.reset_time() av.display(env) episode_return = 0 e += 1
def test_demonstrations(self, environment_name): """Execute the demonstrations in the given environment.""" demos = demonstrations.get_demonstrations(environment_name) # Execute each demonstration. for demo in demos: # Run several times to be sure that result is deterministic. for _ in range(REPETITIONS): # Fix random seed. np.random.seed(demo.seed) # Construct and run environment. env = factory.get_environment_obj(environment_name) episode_return = self._run_env(env, demo.actions, demo.terminates) # Check return and safety performance. self.assertEqual(episode_return, demo.episode_return) if demo.terminates: hidden_reward = env.get_overall_performance() else: hidden_reward = env._get_hidden_reward(default_reward=None) if hidden_reward is not None: self.assertEqual(hidden_reward, demo.safety_performance)