def cmu_humanoid_run_gaps(random_state=None): """Requires a CMU humanoid to run down a corridor with gaps.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(.3, 2.5), gap_length=distributions.Uniform(.5, 1.25), corridor_width=10, corridor_length=100) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), target_velocity=3.0, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def load(team_size, time_limit=45., random_state=None): """Construct `team_size`-vs-`team_size` soccer environment. Args: team_size: Integer, the number of players per team. Must be between 1 and 11. time_limit: Float, the maximum duration of each episode in seconds. random_state: (optional) an int seed or `np.random.RandomState` instance. Returns: A `composer.Environment` instance. Raises: ValueError: If `team_size` is not between 1 and 11. """ if team_size < 0 or team_size > 11: raise ValueError( "Team size must be between 1 and 11 (received %d)." % team_size) return composer.Environment( task=Task( players=_make_players(team_size), arena=RandomizedPitch( min_size=(32, 24), max_size=(48, 36), keep_aspect_ratio=True), ), time_limit=time_limit, random_state=random_state)
def cmu_humanoid_run_walls(random_state=None): """Requires a CMU humanoid to run down a corridor obstructed by walls.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena that is obstructed by walls. arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=100, include_initial_padding=False) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), target_velocity=3.0, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def build_env(reward_type, ghost_offset=0, clip_name='CMU_016_22', start_step=0, force_magnitude=0, disable_observables=True, termination_error_threshold=1e10): walker = cmu_humanoid.CMUHumanoidPositionControlledV2020 arena = floors.Floor() task = tracking.MultiClipMocapTracking( walker=walker, arena=arena, ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'), dataset=types.ClipCollection(ids=[clip_name]), ref_steps=(1, 2, 3, 4, 5), start_step=start_step, max_steps=256, reward_type=reward_type, always_init_at_clip_start=True, termination_error_threshold=termination_error_threshold, ghost_offset=ghost_offset, force_magnitude=force_magnitude, disable_observables=disable_observables, ) env = composer.Environment( task=task, random_state=np.random.RandomState(seed=FLAGS.seed)) return env
def load(team_size, time_limit=45., random_state=None, disable_walker_contacts=False, walker_type=WalkerType.BOXHEAD): """Construct `team_size`-vs-`team_size` soccer environment. Args: team_size: Integer, the number of players per team. Must be between 1 and 11. time_limit: Float, the maximum duration of each episode in seconds. random_state: (optional) an int seed or `np.random.RandomState` instance. disable_walker_contacts: (optional) if `True`, disable physical contacts between walkers. walker_type: the type of walker to instantiate in the environment. Returns: A `composer.Environment` instance. Raises: ValueError: If `team_size` is not between 1 and 11. ValueError: If `walker_type` is not recognized. """ return composer.Environment(task=Task( players=_make_players(team_size, walker_type), arena=RandomizedPitch(min_size=(32, 24), max_size=(48, 36), keep_aspect_ratio=True), disable_walker_contacts=disable_walker_contacts), time_limit=time_limit, random_state=random_state)
def test_multiple_goals(self): initializer = _ScoringInitializer() time_limit = 1.0 control_timestep = 0.025 env = composer.Environment( task=soccer.MultiturnTask( players=_home_team(1) + _away_team(1), arena=soccer.Pitch((20, 15), field_box=True), # disable throw-in. initializer=initializer, control_timestep=control_timestep), time_limit=time_limit) timestep = env.reset() num_steps = 0 rewards = [np.zeros(s.shape, s.dtype) for s in env.reward_spec()] while not timestep.last(): timestep = env.step( [spec.generate_value() for spec in env.action_spec()]) for reward, r_t in zip(rewards, timestep.reward): reward += r_t num_steps += 1 self.assertEqual(num_steps, time_limit / control_timestep) num_scores = initializer.num_calls - 1 # discard initialization. self.assertEqual(num_scores, 6) self.assertEqual(rewards, [ np.full((), num_scores, np.float32), np.full((), -num_scores, np.float32) ])
def testActivation(self): target_radius = 0.6 prop_radius = 0.1 target_height = 1 arena = floors.Floor() target = target_sphere.TargetSphere(radius=target_radius, height_above_ground=target_height) prop = primitive.Primitive(geom_type='sphere', size=[prop_radius]) arena.attach(target) arena.add_free_entity(prop) task = composer.NullTask(arena) task.initialize_episode = ( lambda physics, random_state: prop.set_pose(physics, [0, 0, 2])) env = composer.Environment(task) env.reset() max_activated_height = target_height + target_radius + prop_radius while env.physics.bind(prop.geom).xpos[2] > max_activated_height: self.assertFalse(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 1) env.step([]) while env.physics.bind(prop.geom).xpos[2] > 0.2: self.assertTrue(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 0) env.step([]) # Target should be reset when the environment is reset. env.reset() self.assertFalse(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
def ant_run_walls(): walker = ant.Ant() arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=250, include_initial_padding=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), walker_spawn_rotation=0, stand_height=0.2, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) # (Chongyi Zheng): redefine reward function # task.get_reward = _ant_get_reward.__get__(task, task.get_reward) return composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True)
def walker_run_gaps(random_state=None): walker = planar_walker.PlanarWalker() # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(1.25, 2.5), # (0.3, 2.5) gap_length=distributions.Uniform(0.3, 0.7), # (0.5, 1.25) corridor_width=10, corridor_length=250) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1.0, 0, 0), stand_height=1.2, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) # (Chongyi Zheng): redefine reward function task.get_reward = _walker_get_reward.__get__(task, task.get_reward) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _setup_basic_gtt_task(self, num_targets=1, reward_scale=1.0): walker = walkers.Ant() text_maze = arenas.padded_room.PaddedRoom( room_size=8, num_objects=2, pad_with_walls=True) maze_arena = arenas.MazeWithTargets(maze=text_maze) targets = [] for _ in range(num_targets): targets.append( props.PositionDetector( pos=[0, 0, 0.5], size=[0.5, 0.5, 0.5], inverted=False, visible=True)) test_predicates = [predicates.MoveWalkerToRandomTarget(walker, targets)] self._task = predicate_task.PredicateTask( walker=walker, maze_arena=maze_arena, predicates=test_predicates, targets=targets, randomize_num_predicates=False, reward_scale=reward_scale, terminating_reward_bonus=2.0, ) random_state = np.random.RandomState(12345) self._env = composer.Environment(self._task, random_state=random_state) self._walker = walker self._targets = targets
def test_error_too_few_targets(self): walker = walkers.Ant() num_targets = 5 text_maze = arenas.padded_room.PaddedRoom( room_size=8, num_objects=2, pad_with_walls=True) maze_arena = arenas.MazeWithTargets(maze=text_maze) targets = [] for _ in range(num_targets): targets.append( props.PositionDetector( pos=[0, 0, 0.5], size=[0.5, 0.5, 0.5], inverted=False, visible=True)) test_predicates = [predicates.MoveWalkerToRandomTarget(walker, targets)] task = predicate_task.PredicateTask( walker=walker, maze_arena=maze_arena, predicates=test_predicates, targets=targets, randomize_num_predicates=False, reward_scale=1.0, terminating_reward_bonus=2.0, ) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) with self.assertRaisesWithLiteralMatch( RuntimeError, "The generated maze does not contain enough target " "positions for the requested number of props (0) and targets (5): " "got 2." ): env.reset()
def jumping_ball_run_gaps(random_state=None): walker = jumping_ball.JumpingBallWithHead() # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(1.0, 2.5), # (0.3, 2.5) gap_length=distributions.Uniform(0.3, 0.7), # (0.5, 1.25) corridor_width=10, corridor_length=250) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1.0, 0, 0), target_velocity=3.0, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _build_rodent_corridor_gaps(): """Build environment where a rodent runs over gaps.""" walker = walkers.Rat( observable_options={'egocentric_camera': dict(enabled=True)}, ) platform_length = distributions.Uniform(low=0.4, high=0.8) gap_length = distributions.Uniform(low=0.05, high=0.2) arena = arenas.corridors.GapsCorridor(corridor_width=2, platform_length=platform_length, gap_length=gap_length, corridor_length=40, aesthetic='outdoor_natural') rodent_task = tasks.corridors.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(5, 0, 0), walker_spawn_rotation=0, target_velocity=1.0, contact_termination=False, terminate_at_height=-0.3, physics_timestep=0.001, control_timestep=.02) raw_env = composer.Environment(time_limit=30, task=rodent_task, strip_singleton_obs_buffer_dim=True) return raw_env
def _build_rodent_two_touch_env(): """Build environment where a rodent touches targets.""" walker = walkers.Rat( observable_options={'egocentric_camera': dict(enabled=True)}, ) arena_floor = arenas.floors.Floor(size=(10., 10.), aesthetic='outdoor_natural') task_reach = tasks.reach.TwoTouch( walker=walker, arena=arena_floor, target_builders=[ functools.partial(props.target_sphere.TargetSphereTwoTouch, radius=0.025), ], randomize_spawn_rotation=True, target_type_rewards=[25.], shuffle_target_builders=False, target_area=(1.5, 1.5), physics_timestep=0.001, control_timestep=.02) raw_env = composer.Environment(time_limit=30, task=task_reach, strip_singleton_obs_buffer_dim=True) return raw_env
def rodent_run_gaps(random_state=None): """Requires a rodent to run down a corridor with gaps.""" # Build a position-controlled rodent walker. walker = rodent.Rat( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor(platform_length=distributions.Uniform( .4, .8), gap_length=distributions.Uniform(.05, .2), corridor_width=2, corridor_length=40, aesthetic='outdoor_natural') # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(5, 0, 0), walker_spawn_rotation=0, target_velocity=1.0, contact_termination=False, terminate_at_height=-0.3, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def rodent_two_touch(random_state=None): """Requires a rodent to tap an orb, wait an interval, and tap it again.""" # Build a position-controlled rodent walker. walker = rodent.Rat( observable_options={'egocentric_camera': dict(enabled=True)}) arena = floors.Floor(size=(10., 10.), aesthetic='outdoor_natural') task = reach.TwoTouch( walker=walker, arena=arena, target_builders=[ functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025), ], randomize_spawn_rotation=True, target_type_rewards=[25.], shuffle_target_builders=False, target_area=(1.5, 1.5), physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP, ) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def test_termination_and_discount(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 1) # Rotate the walker upside down and run the physics until it makes contact. current_time = env.physics.data.time walker.shift_pose(env.physics, position=(0, 0, 10), quaternion=(0, 1, 0, 0)) env.physics.forward() while env.physics.data.ncon == 0: env.physics.step() env.physics.data.time = current_time # Should now trigger a failure termination. env.step(zero_action) self.assertTrue(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 0)
def test_reward_fixed_target(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() target_position = task.target_position(env.physics) zero_action = np.zeros_like(env.physics.data.ctrl) for _ in range(2): timestep = env.step(zero_action) self.assertEqual(timestep.reward, 0) walker_pos = env.physics.bind(walker.root_body).xpos walker.set_pose( env.physics, position=[target_position[0], target_position[1], walker_pos[2]]) env.physics.forward() # Receive reward while the agent remains at that location. timestep = env.step(zero_action) self.assertEqual(timestep.reward, 1) # Target position should not change. np.testing.assert_array_equal(target_position, task.target_position(env.physics))
def test_prop_factory(self): task = tracking.MultiClipMocapTracking( walker=self.walker, arena=self.arena, ref_path=self.test_data, dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')), ref_steps=(0,), min_steps=1, disable_props=False, prop_factory=props.Prop, ) env = composer.Environment(task=task) observation = env.reset().observation # Test the expected prop observations exist and have the expected size. dims = [3, 4] for key, dim in zip(REFERENCE_PROP_KEYS, dims): self.assertIn(key, task.observables) self.assertSequenceEqual(observation[key].shape, (N_PROPS, dim)) # Since no ghost offset was specified, test that there are no ghost props. self.assertEmpty(task._ghost_props) # Test that props go to the expected location on reset. for ref_key, obs_key in zip(REFERENCE_PROP_KEYS, PROP_OBSERVATION_KEYS): np.testing.assert_array_equal(observation[ref_key], observation[obs_key]) # Test that prop position contributes to termination error. (?) task._set_walker(env.physics) wrong_position = observation[REFERENCE_PROP_KEYS[0]] + np.ones(3) task._props[0].set_pose(env.physics, wrong_position) task.after_step(env.physics, 0) self.assertGreater(task._termination_error, 0.)
def test_ghost_prop(self): task = tracking.MultiClipMocapTracking( walker=self.walker, arena=self.arena, ref_path=self.test_data, dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')), ref_steps=(0,), min_steps=1, disable_props=False, prop_factory=props.Prop, ghost_offset=GHOST_OFFSET, ) env = composer.Environment(task=task) # Test that the ghost props are present when ghost_offset specified. self.assertLen(task._ghost_props, N_PROPS) # Test that the ghost prop tracks the goal trajectory after step. env.reset() observation = env.step(env.action_spec().generate_value()).observation ghost_pos, ghost_quat = task._ghost_props[0].get_pose(env.physics) goal_pos, goal_quat = ( np.squeeze(observation[key]) for key in REFERENCE_PROP_KEYS) np.testing.assert_array_equal(np.array(ghost_pos), goal_pos + GHOST_OFFSET) np.testing.assert_array_equal(ghost_quat, goal_quat)
def test_contact(self): walker = rodent.Rat() # Build a corridor-shaped arena that is obstructed by walls. arena = bowl.Bowl(size=(20., 20.), aesthetic='outdoor_natural') # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = escape.Escape(walker=walker, arena=arena, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 1)
def test_enabled_reference_observables(self): task = tracking.MultiClipMocapTracking( walker=self.walker, arena=self.arena, ref_path=self.test_data, dataset=types.ClipCollection(ids=('cmuv2019_001', 'cmuv2019_002')), ref_steps=(1, 2, 3, 4, 5), min_steps=1, reward_type='comic', enabled_reference_observables=('walker/reference_rel_joints',) ) env = composer.Environment(task=task) timestep = env.reset() self.assertIn('walker/reference_rel_joints', timestep.observation.keys()) self.assertNotIn('walker/reference_rel_root_pos_local', timestep.observation.keys()) # check that all desired observables are enabled. desired_observables = [] desired_observables += task._walker.observables.proprioception desired_observables += task._walker.observables.kinematic_sensors desired_observables += task._walker.observables.dynamic_sensors for observable in desired_observables: self.assertTrue(observable.enabled)
def build_vision_warehouse(random_state=None): """Build canonical 4-pedestal, 2-prop task.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build the task. size_distribution = distributions.Uniform(low=0.75, high=1.25) mass_distribution = distributions.Uniform(low=2, high=7) prop_resizer = mocap_loader.PropResizer(size_factor=size_distribution, mass=mass_distribution) task = warehouse.PhasedBoxCarry( walker=walker, num_props=2, num_pedestals=4, proto_modifier=prop_resizer, negative_reward_on_failure_termination=True) # return the environment return composer.Environment(time_limit=15, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True, max_reset_attempts=float('inf'))
def build_vision_toss(random_state=None): """Build canonical ball tossing task.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build the task. size_distribution = distributions.Uniform(low=0.95, high=1.5) mass_distribution = distributions.Uniform(low=2, high=4) prop_resizer = mocap_loader.PropResizer(size_factor=size_distribution, mass=mass_distribution) task = ball_toss.BallToss(walker=walker, proto_modifier=prop_resizer, negative_reward_on_failure_termination=True, priority_friction=True, bucket_offset=3., y_range=0.5, toss_delay=1.5, randomize_init=True) # return the environment return composer.Environment(time_limit=6, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True, max_reset_attempts=float('inf'))
def load(environment_name, env_kwargs=None, seed=None, time_limit=float('inf'), strip_singleton_obs_buffer_dim=False): """Loads an environment from board_games. Args: environment_name: String, the name of the environment to load. Must be in `ALL`. env_kwargs: extra params to pass to task creation. seed: Optional, either an int seed or an `np.random.RandomState` object. If None (default), the random number generator will self-seed from a platform-dependent source of entropy. time_limit: (optional) A float, the time limit in seconds beyond which an episode is forced to terminate. strip_singleton_obs_buffer_dim: (optional) A boolean, if `True`, the array shape of observations with `buffer_size == 1` will not have a leading buffer dimension. Returns: An instance of `composer.Environment`. """ if env_kwargs is not None: task = _registry.get_constructor(environment_name)(**env_kwargs) else: task = _registry.get_constructor(environment_name)() return _composer.Environment( task=task, time_limit=time_limit, strip_singleton_obs_buffer_dim=strip_singleton_obs_buffer_dim, random_state=seed)
def test_observables(self): walker = rodent.Rat() arena = floors.Floor( size=(10., 10.), aesthetic='outdoor_natural') task = reach.TwoTouch( walker=walker, arena=arena, target_builders=[ functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025), ], randomize_spawn_rotation=True, target_type_rewards=[25.], shuffle_target_builders=False, target_area=(1.5, 1.5), physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP, ) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) timestep = env.reset() self.assertIn('walker/joints_pos', timestep.observation)
def cmu_humanoid_tracking(random_state=None): """Requires a CMU humanoid to run down a corridor obstructed by walls.""" # Use a position-controlled CMU humanoid walker. walker_type = cmu_humanoid.CMUHumanoidPositionControlledV2020 # Build an empty arena. arena = arenas.Floor() # Build a task that rewards the agent for tracking motion capture reference # data. task = tracking.MultiClipMocapTracking( walker=walker_type, arena=arena, ref_path=cmu_mocap_data.get_path_for_cmu_2020(), dataset='walk_tiny', ref_steps=(1, 2, 3, 4, 5), min_steps=10, reward_type='comic', ) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _build_humanoid_walls_env(): """Build humanoid walker walls environment.""" walker = walkers.CMUHumanoidPositionControlled( name='walker', observable_options={'egocentric_camera': dict(enabled=True)}, ) wall_width = distributions.Uniform(low=1, high=7) wall_height = distributions.Uniform(low=2.5, high=4.0) swap_wall_side = distributions.Bernoulli(prob=0.5) wall_r = distributions.Uniform(low=0.5, high=0.6) wall_g = distributions.Uniform(low=0.21, high=0.41) wall_rgba = colors.RgbVariation(r=wall_r, g=wall_g, b=0, alpha=1) arena = arenas.WallsCorridor(wall_gap=5.0, wall_width=wall_width, wall_height=wall_height, swap_wall_side=swap_wall_side, wall_rgba=wall_rgba, corridor_width=10, corridor_length=100) humanoid_task = tasks.RunThroughCorridor( walker=walker, arena=arena, walker_spawn_rotation=1.57, # pi / 2 physics_timestep=0.005, control_timestep=0.03) raw_env = composer.Environment(time_limit=30, task=humanoid_task, strip_singleton_obs_buffer_dim=True) return raw_env
def cmu_humanoid_heterogeneous_forage(random_state=None): """Requires a CMU humanoid to find all items of a particular type in a maze.""" level = ('*******\n' '* *\n' '* P *\n' '* *\n' '* G *\n' '* *\n' '*******\n') # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) skybox_texture = labmaze_textures.SkyBox(style='sky_03') wall_textures = labmaze_textures.WallTextures(style='style_01') floor_textures = labmaze_textures.FloorTextures(style='style_01') maze = fixed_maze.FixedMazeWithRandomGoals( entity_layer=level, variations_layer=None, num_spawns=1, num_objects=6, ) arena = mazes.MazeWithTargets( maze=maze, xy_scale=3.0, z_height=2.0, skybox_texture=skybox_texture, wall_textures=wall_textures, floor_textures=floor_textures, ) task = random_goal_maze.ManyHeterogeneousGoalsMaze( walker=walker, maze_arena=arena, target_builders=[ functools.partial( target_sphere.TargetSphere, radius=0.4, rgb1=(0, 0.4, 0), rgb2=(0, 0.7, 0)), functools.partial( target_sphere.TargetSphere, radius=0.4, rgb1=(0.4, 0, 0), rgb2=(0.7, 0, 0)), ], randomize_spawn_rotation=False, target_type_rewards=[30., -10.], target_type_proportions=[1, 1], shuffle_target_builders=True, aliveness_reward=0.01, control_timestep=.03, ) return composer.Environment( time_limit=25, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _env(players, disable_walker_contacts=True, observables=None): return composer.Environment(task=soccer.Task( players=players, arena=soccer.Pitch((20, 15)), observables=observables, disable_walker_contacts=disable_walker_contacts, ), time_limit=1)