def make_object_under_test(self, renderer): self.environment = environment.Environment( task=tasks.NoReward(), action_space=action_spaces.SelectMove(), renderers={'obs': renderer}, init_sprites=lambda: [sprite.Sprite(c0=255)], max_episode_length=7)
def make_object_under_test(self): """Environment creator used by test_utils.EnvironmentTestMixin.""" env = environment.Environment( task=tasks.NoReward(), action_space=action_spaces.SelectMove(), renderers={}, init_sprites=lambda: [sprite.Sprite(c0=255)], max_episode_length=7) return env
def testConfig(self, task_module, modes=('train', 'test'), replicas=3): for mode in modes: print(mode) for _ in range(replicas): config = task_module.get_config(mode=mode) config['renderers'] = {} env = environment.Environment(**config) env.observation_spec() action = env.action_space.sample() num_episodes = 0 step = env.reset() while num_episodes < 5: if step.first(): num_episodes += 1 step = env.step(action)
def main(argv): del argv config = importlib.import_module(FLAGS.config) config = config.get_config(FLAGS.mode) config['renderers']['success'] = renderers.Success() # Used for logging env = environment.Environment(**config) agent = RandomAgent(env) # Loop over episodes, logging success and mean reward per episode for episode in range(FLAGS.num_episodes): timestep = env.reset() rewards = [] while not timestep.last(): action = agent.step(timestep) timestep = env.step(action) rewards.append(timestep.reward) logging.info('Episode %d: Success = %r, Reward = %s.', episode, timestep.observation['success'], np.nanmean(rewards))
def setup_run_ui(env_config, render_size, task_hsv_colors, anti_aliasing): """Start a Demo UI given an env_config.""" if isinstance(env_config['action_space'], action_spaces.SelectMove): # DragAndDrop is a bit easier to demo than the SelectMove action space #env_config['action_space'] = action_spaces.DragAndDrop(scale=0.5, noise_scale=np.array([0,0,0.025,0.025]), proportional_motion_noise=0.35) env_config['action_space'] = action_spaces.DragAndDrop( scale=0.5, noise_scale=0.02, proportional_motion_noise=0.35, filter_distribs=env_config['metadata']['filter_distribs']) agent = HumanDragAndDropAgent(env_config['action_space']) elif isinstance(env_config['action_space'], action_spaces.Embodied): agent = HumanEmbodiedAgent(env_config['action_space']) else: raise ValueError( 'Demo is not configured to run with action space {}.'.format( env_config['action_space'])) env_config['renderers'] = { 'image': renderers.PILRenderer(image_size=(render_size, render_size), color_to_rgb=renderers.color_maps.hsv_to_rgb if task_hsv_colors else None, anti_aliasing=anti_aliasing), 'success': renderers.Success() } env = environment.Environment(**env_config) ui = MatplotlibUI() agent.register_callbacks(ui) # Start RL loop timestep = env.reset() ui.update(timestep, action=None) while True: action = agent.step(timestep) timestep = env.step(action) if isinstance(env_config['action_space'], action_spaces.DragAndDrop): ui.update(timestep, action) else: ui.update(timestep, None)
def main(_): config = importlib.import_module(FLAGS.config) config = config.get_config(FLAGS.mode) if isinstance(config['action_space'], action_spaces.SelectMove): # DragAndDrop is a bit easier to demo than the SelectMove action space config['action_space'] = action_spaces.DragAndDrop(scale=0.5) agent = HumanDragAndDropAgent(config['action_space']) elif isinstance(config['action_space'], action_spaces.Embodied): agent = HumanEmbodiedAgent(config['action_space']) else: raise ValueError( 'Demo is not configured to run with action space {}.'.format( config['action_space'])) config['renderers'] = { 'image': renderers.PILRenderer(image_size=(FLAGS.render_size, FLAGS.render_size), color_to_rgb=color_maps.hsv_to_rgb if FLAGS.task_hsv_colors else None, anti_aliasing=FLAGS.anti_aliasing), 'success': renderers.Success() } env = environment.Environment(**config) demo = DemoUI() for event_name, callback in agent.callbacks().items(): demo.register_callback(event_name, callback) timestep = env.reset() demo.update(timestep, action=None) while True: action = agent.step(timestep) timestep = env.step(action) if isinstance(config['action_space'], action_spaces.DragAndDrop): demo.update(timestep, action) else: demo.update(timestep, None)
def testTaskTermination(self): task = tasks.FindGoalPosition(goal_position=(0.5, 0.5)) action_space = action_spaces.SelectMove() env_renderers = {} init_sprites = lambda: [sprite.Sprite(x=0.25, y=0.25, c0=255)] env = environment.Environment(task, action_space, env_renderers, init_sprites) donothing_action = np.array([0.25, 0.25, 0.5, 0.5]) success_action = np.array([0.25, 0.25, 0.75, 0.75]) timestep = env.step(donothing_action) self.assertTrue(timestep.first()) timestep = env.step(donothing_action) self.assertTrue(timestep.mid()) timestep = env.step(success_action) self.assertTrue(timestep.last()) timestep = env.step(success_action) self.assertTrue(timestep.first())
def setup_run_ui(env_config, render_size, task_hsv_colors, anti_aliasing): """Start a Demo UI given an env_config.""" if isinstance(env_config['action_space'], action_spaces.SelectMove): # DragAndDrop is a bit easier to demo than the SelectMove action space env_config['action_space'] = action_spaces.DragAndDrop(scale=0.5) agent = HumanDragAndDropAgent(env_config['action_space']) elif isinstance(env_config['action_space'], action_spaces.Embodied): agent = HumanEmbodiedAgent(env_config['action_space']) else: raise ValueError( 'Demo is not configured to run with action space {}.'.format( env_config['action_space'])) env_config['renderers'] = { 'image': renderers.PILRenderer( image_size=(render_size, render_size), color_to_rgb=renderers.color_maps.hsv_to_rgb if task_hsv_colors else None, anti_aliasing=anti_aliasing), 'success': renderers.Success() } env = environment.Environment(**env_config) demo = DemoUI() for event_name, callback in agent.callbacks().items(): demo.register_callback(event_name, callback) # Start RL loop timestep = env.reset() demo.update(timestep, action=None) while True: action = agent.step(timestep) timestep = env.step(action) if isinstance(env_config['action_space'], action_spaces.DragAndDrop): demo.update(timestep, action) else: demo.update(timestep, None)
def main(argv): del argv config = importlib.import_module(FLAGS.config) config = config.get_config(FLAGS.mode) config['renderers']['success'] = renderers.Success() # Used for logging env = environment.Environment(**config) agent = RandomAgent(env) episode_frames = np.zeros((20, 64, 64, 3)) # Loop over episodes, logging success and mean reward per episode for episode in range(FLAGS.num_episodes): timestep = env.reset() t = 0 rewards = [] while not timestep.last(): episode_frames[t] = timestep.observation['image'] action = agent.step(timestep) timestep = env.step(action) rewards.append(timestep.reward) t += 1 logging.info('Episode %d: Success = %r, Reward = %s.', episode, timestep.observation['success'], np.nanmax(rewards)) np.save('random_actions', episode_frames)