def initialize_episode(self, physics, random_state): self._walker.reinitialize_pose(physics, random_state) if self._walker_spawn_rotation: rotation = variation.evaluate( self._walker_spawn_rotation, random_state=random_state) quat = [np.cos(rotation / 2), 0, 0, np.sin(rotation / 2)] else: quat = None walker_x, walker_y = variation.evaluate( self._walker_spawn_position, random_state=random_state) self._walker.shift_pose( physics, position=[walker_x, walker_y, 0.], quaternion=quat, rotate_velocity=True) self._failure_termination = False walker_foot_geoms = set(self._walker.ground_contact_geoms) walker_nonfoot_geoms = [ geom for geom in self._walker.mjcf_model.find_all('geom') if geom not in walker_foot_geoms] self._walker_nonfoot_geomids = set( physics.bind(walker_nonfoot_geoms).element_id) self._ground_geomids = set( physics.bind(self._arena.ground_geoms).element_id) self._ground_geomids.add(physics.bind(self._target).element_id)
def __call__(self, proto, random_state=None): for prop in proto.props: size_value = variation.evaluate(self._size_variation, random_state=random_state) if not np.shape(size_value): size_value = np.full(len(prop.size), size_value) for i in range(len(prop.size)): prop.size[i] = self._size_op(prop.size[i], size_value[i]) prop.mass = variation.evaluate(self._mass, random_state=random_state)
def test_binary_operator(self, name): func = getattr(operator, name) self.assertEqual( variation.evaluate(func(self.value_1, self.variation_2)), func(self.value_1, self.value_2)) self.assertEqual( variation.evaluate(func(self.variation_1, self.value_2)), func(self.value_1, self.value_2)) self.assertEqual( variation.evaluate(func(self.variation_1, self.variation_2)), func(self.value_1, self.value_2))
def __call__(self, initial_value=None, current_value=None, random_state=None): local_random_state = random_state or np.random size = (None if self._single_sample or initial_value is None # pylint: disable=g-long-ternary else np.shape(initial_value)) local_args = variation.evaluate(self._args, initial_value=initial_value, current_value=current_value, random_state=random_state) local_kwargs = variation.evaluate(self._kwargs, initial_value=initial_value, current_value=current_value, random_state=random_state) return self._callable(local_random_state)(*local_args, size=size, **local_kwargs)
def after_step(self, physics, random_state): self._failure_termination = False for contact in physics.data.contact: if self._is_disallowed_contact(contact): self._failure_termination = True break if (self._moving_target and self._reward_step_counter >= self._steps_before_moving_target): # Reset the target position. if self._target_relative: walker_pos = physics.bind(self._walker.root_body).xpos[:2] target_x, target_y = random_state.uniform( -np.array([ self._target_relative_dist, self._target_relative_dist ]), np.array([ self._target_relative_dist, self._target_relative_dist ])) target_x += walker_pos[0] target_y += walker_pos[1] else: target_x, target_y = variation.evaluate( self._target_spawn_position, random_state=random_state) physics.bind(self._target).pos = [target_x, target_y, 0.] # Reset the number of steps at the target for the moving target. self._reward_step_counter = 0
def __call__(self, physics, random_state): """Sets initial tool center point pose via inverse kinematics. Args: physics: An `mjcf.Physics` instance. random_state: An `np.random.RandomState` instance. Raises: RuntimeError: If a collision-free pose could not be found within `max_ik_attempts`. """ if self._hand is not None: target_site = self._hand.tool_center_point else: target_site = self._arm.wrist_site initial_qpos = physics.bind(self._arm.joints).qpos.copy() for _ in range(self._max_rejection_samples): target_pos = variation.evaluate(self._position, random_state=random_state) target_quat = variation.evaluate(self._quaternion, random_state=random_state) success = self._arm.set_site_to_xpos( physics=physics, random_state=random_state, site=target_site, target_pos=target_pos, target_quat=target_quat, max_ik_attempts=self._max_ik_attempts) if success: physics.forward() # Recalculate contacts. if (self._ignore_collisions or not self._has_relevant_collisions(physics)): return # If IK failed to find a solution for this target pose, or if the solution # resulted in contacts, then reset the arm joints to their original # positions and try again with a new target. physics.bind(self._arm.joints).qpos = initial_qpos raise RuntimeError( _REJECTION_SAMPLING_FAILED.format( max_rejection_samples=self._max_rejection_samples, max_ik_attempts=self._max_ik_attempts))
def regenerate(self, random_state): """Regenerates this corridor. New values are drawn from the `corridor_width` and `corridor_height` distributions specified in `_build`. The corridor is resized accordingly. Args: random_state: A `numpy.random.RandomState` object that is passed to the `Variation` objects. """ self._walls_body.geom.clear() corridor_width = variation.evaluate(self._corridor_width, random_state=random_state) corridor_length = variation.evaluate(self._corridor_length, random_state=random_state) self._current_corridor_length = corridor_length self._current_corridor_width = corridor_width self._ground_plane.pos = [corridor_length / 2, 0, 0] self._ground_plane.size = [ corridor_length / 2 + _CORRIDOR_X_PADDING, corridor_width / 2, 1 ] self._left_plane.pos = [ corridor_length / 2, corridor_width / 2, _SIDE_WALL_HEIGHT / 2 ] self._left_plane.size = [ corridor_length / 2 + _CORRIDOR_X_PADDING, _SIDE_WALL_HEIGHT / 2, 1 ] self._right_plane.pos = [ corridor_length / 2, -corridor_width / 2, _SIDE_WALL_HEIGHT / 2 ] self._right_plane.size = [ corridor_length / 2 + _CORRIDOR_X_PADDING, _SIDE_WALL_HEIGHT / 2, 1 ] self._near_plane.pos = [-_CORRIDOR_X_PADDING, 0, _SIDE_WALL_HEIGHT / 2] self._near_plane.size = [corridor_width / 2, _SIDE_WALL_HEIGHT / 2, 1] self._far_plane.pos = [ corridor_length + _CORRIDOR_X_PADDING, 0, _SIDE_WALL_HEIGHT / 2 ] self._far_plane.size = [corridor_width / 2, _SIDE_WALL_HEIGHT / 2, 1]
def _build_stack(physics, bricks, base_pos, base_quat, order, random_state): """Builds a stack of bricks. Args: physics: Instance of `mjcf.Physics`. bricks: Sequence of `composer.Entity` instances corresponding to bricks. base_pos: Position of the base brick in the stack; either a (3,) numpy array or a `variation.Variation` that yields such arrays. base_quat: Quaternion of the base brick in the stack; either a (4,) numpy array or a `variation.Variation` that yields such arrays. order: Sequence of indices specifying the order in which to stack the bricks. random_state: An `np.random.RandomState` instance. """ base_pos = variation.evaluate(base_pos, random_state=random_state) base_quat = variation.evaluate(base_quat, random_state=random_state) bricks[order[0]].set_pose(physics, position=base_pos, quaternion=base_quat) for bottom_idx, top_idx in zip(order[:-1], order[1:]): bottom = bricks[bottom_idx] top = bricks[top_idx] stud_pos = physics.bind(bottom.studs[0, 0]).xpos _, quat = bottom.get_pose(physics) # The reward function treats top left -> top left and top left -> bottom # right configurations as identical, so the orientations of the bricks are # randomized so that 50% of the time the top brick is rotated 180 degrees # relative to the brick below. if random_state.rand() < 0.5: quat = quat.copy() axis = np.array([0., 0., 1.]) angle = np.pi mjlib.mju_quatIntegrate(quat, axis, angle) hole_idx = (-1, -1) else: hole_idx = (0, 0) top.set_pose(physics, quaternion=quat) # Set the position of the top brick so that its holes line up with the studs # of the brick below. offset = physics.bind(top.holes[hole_idx]).xpos top_pos = stud_pos - offset top.set_pose(physics, position=top_pos)
def initialize_episode(self, physics, random_state): self._ground_geomid = physics.bind( self._arena.mjcf_model.worldbody.geom[0]).element_id self._feet_geomids = set(physics.bind(self._feet_geoms).element_id) self._lhand_geomids = set(physics.bind(self._lhand_geoms).element_id) self._rhand_geomids = set(physics.bind(self._rhand_geoms).element_id) self._walker_geomids = set(physics.bind(self._walker_geoms).element_id) self._bucket_rewarded = False if self._randomize_init: timestep_ind = random_state.randint( len(self._trajectory._proto.timesteps)) # pylint: disable=protected-access else: timestep_ind = 0 walker_init_timestep = self._trajectory._proto.timesteps[timestep_ind] # pylint: disable=protected-access prop_init_timestep = self._trajectory._proto.timesteps[0] # pylint: disable=protected-access self._walker.set_pose( physics, position=walker_init_timestep.walkers[0].position, quaternion=walker_init_timestep.walkers[0].quaternion) self._walker.set_velocity( physics, velocity=walker_init_timestep.walkers[0].velocity, angular_velocity=walker_init_timestep.walkers[0].angular_velocity) physics.bind(self._walker.mocap_joints).qpos = ( walker_init_timestep.walkers[0].joints) physics.bind(self._walker.mocap_joints).qvel = ( walker_init_timestep.walkers[0].joints_velocity) initial_prop_pos = np.copy(prop_init_timestep.props[0].position) initial_prop_pos[0] += 1. # move ball (from mocap) relative to origin initial_prop_pos[1] = 0 # align ball with walker along y-axis self._prop.set_pose( physics, position=initial_prop_pos, quaternion=prop_init_timestep.props[0].quaternion) # specify the distributions of ball velocity componentwise x_vel_mag = 4.5*random_state.rand()+1.5 # m/s x_dist = 3 # approximate initial distance from walker to ball self._t_dist = x_dist/x_vel_mag # target time at which to hit the humanoid z_offset = .4*random_state.rand()+.1 # height at which to hit person # compute velocity to satisfy desired projectile trajectory z_vel_mag = (4.9*(self._t_dist**2) + z_offset)/self._t_dist y_range = variation.evaluate(self._y_range, random_state=random_state) y_vel_mag = y_range*random_state.rand()-y_range/2 trans_vel = [-x_vel_mag, y_vel_mag, z_vel_mag] ang_vel = 1.5*random_state.rand(3)-0.75 self._prop.set_velocity( physics, velocity=trans_vel, angular_velocity=ang_vel)
def place_props(): for prop in self._props: # Restore the original contact parameters for all geoms in the prop # we're about to place, so that we can detect if the new pose results in # collisions. self._restore_contact_parameters(physics, prop, cached_contact_params) success = False initial_position, initial_quaternion = prop.get_pose(physics) next_position, next_quaternion = initial_position, initial_quaternion for _ in range(self._max_attempts_per_prop): next_position = variation.evaluate(self._position, initial_value=initial_position, current_value=next_position, random_state=random_state) next_quaternion = variation.evaluate(self._quaternion, initial_value=initial_quaternion, current_value=next_quaternion, random_state=random_state) prop.set_pose(physics, next_position, next_quaternion) try: # If this pose results in collisions then there's a chance we'll # encounter a PhysicsError error here due to a full contact buffer, # in which case reject this pose and sample another. physics.forward() except control.PhysicsError: continue if (self._ignore_collisions or not self._has_collisions_with_prop(physics, prop)): success = True break if not success: raise RuntimeError(_REJECTION_SAMPLING_FAILED.format( model_name=prop.mjcf_model.model, max_attempts=self._max_attempts_per_prop)) for prop in ignore_contacts_with_entities: self._restore_contact_parameters(physics, prop, cached_contact_params)
def after_step(self, physics, random_state): self._failure_termination = False for contact in physics.data.contact: if self._is_disallowed_contact(contact): self._failure_termination = True break if (self._moving_target and self._reward_step_counter >= self._steps_before_moving_target): # Reset the target position. target_x, target_y = variation.evaluate( self._target_spawn_position, random_state=random_state) physics.bind(self._target).pos = [target_x, target_y, 0.] # Reset the number of steps at the target for the moving target. self._reward_step_counter = 0
def regenerate(self, random_state): """Regenerates this corridor. New values are drawn from the `corridor_width` and `corridor_height` distributions specified in `_build`. The corridor resized accordingly, and new sets of obstructing walls are created according to values drawn from the `wall_gap`, `wall_width`, `wall_height`, and `wall_rgba` distributions specified in `_build`. Args: random_state: A `numpy.random.RandomState` object that is passed to the `Variation` objects. """ super(WallsCorridor, self).regenerate(random_state) wall_x = variation.evaluate( self._wall_gap, random_state=random_state) - _CORRIDOR_X_PADDING if self._include_initial_padding: wall_x += 2 * _CORRIDOR_X_PADDING wall_side = 0 wall_id = 0 while wall_x < self._current_corridor_length: wall_width = variation.evaluate(self._wall_width, random_state=random_state) wall_height = variation.evaluate(self._wall_height, random_state=random_state) wall_rgba = variation.evaluate(self._wall_rgba, random_state=random_state) if variation.evaluate(self._swap_wall_side, random_state=random_state): wall_side = 1 - wall_side wall_pos = [ wall_x, (2 * wall_side - 1) * (self._current_corridor_width - wall_width) / 2, wall_height / 2 ] wall_size = [_WALL_THICKNESS / 2, wall_width / 2, wall_height / 2] self._walls_body.add('geom', type='box', name='wall_{}'.format(wall_id), pos=wall_pos, size=wall_size, rgba=wall_rgba) wall_id += 1 wall_x += variation.evaluate(self._wall_gap, random_state=random_state)
def regenerate(self, random_state): """Regenerates this corridor. New values are drawn from the `corridor_width` and `corridor_height` distributions specified in `_build`. The corridor resized accordingly, and new sets of platforms are created according to values drawn from the `platform_length`, `gap_length`, and `ground_rgba` distributions specified in `_build`. Args: random_state: A `numpy.random.RandomState` object that is passed to the `Variation` objects. """ # Resize the entire corridor first. super(GapsCorridor, self).regenerate(random_state) # Move the ground plane down and make it invisible. self._ground_plane.pos = [self._current_corridor_length / 2, 0, -10] self._ground_plane.rgba = [0, 0, 0, 0] # Clear the existing platform pieces. self._ground_body.geom.clear() # Make the first platform larger. platform_length = 3. * _CORRIDOR_X_PADDING platform_pos = [ platform_length / 2, 0, -_WALL_THICKNESS, ] platform_size = [ platform_length / 2, self._current_corridor_width / 2, _WALL_THICKNESS, ] if self._aesthetic != 'default': self._ground_body.add('geom', type='box', name='start_floor', pos=platform_pos, size=platform_size, material=self._ground_material) else: self._ground_body.add('geom', type='box', rgba=variation.evaluate( self._ground_rgba, random_state), name='start_floor', pos=platform_pos, size=platform_size) current_x = platform_length platform_id = 0 while current_x < self._current_corridor_length: platform_length = variation.evaluate(self._platform_length, random_state=random_state) platform_pos = [ current_x + platform_length / 2., 0, -_WALL_THICKNESS, ] platform_size = [ platform_length / 2, self._current_corridor_width / 2, _WALL_THICKNESS, ] if self._aesthetic != 'default': self._ground_body.add('geom', type='box', name='floor_{}'.format(platform_id), pos=platform_pos, size=platform_size, material=self._ground_material) else: self._ground_body.add('geom', type='box', rgba=variation.evaluate( self._ground_rgba, random_state), name='floor_{}'.format(platform_id), pos=platform_pos, size=platform_size) platform_id += 1 # Move x to start of the next platform. current_x += platform_length + variation.evaluate( self._gap_length, random_state=random_state)
def __call__(self, physics, random_state, ignore_contacts_with_entities=None): """Sets initial prop poses. Args: physics: An `mjcf.Physics` instance. random_state: a `np.random.RandomState` instance. ignore_contacts_with_entities: a list of `composer.Entity` instances to ignore when detecting collisions. This can be used to ignore props that are not being placed by this initializer, but are known to be colliding in the current state of the simulation (for example if they are going to be placed by a different initializer that will be called subsequently). Raises: RuntimeError: If `ignore_collisions == False` and a non-colliding prop pose could not be found within `max_attempts_per_prop`. """ if ignore_contacts_with_entities is None: ignore_contacts_with_entities = [] # Temporarily disable contacts for all geoms that belong to props which # haven't yet been placed in order to free up space in the contact buffer. cached_contact_params = self._disable_and_cache_contact_parameters( physics, self._props + ignore_contacts_with_entities) try: physics.forward() except control.PhysicsError as e: _, _, tb = sys.exc_info() message = 'Despite disabling contact for all props in this initializer, ' '`physics.forward()` resulted in a `PhysicsError`: {}'.format(e) new_exc = control.PhysicsError(message) six.reraise(control.PhysicsError, new_exc, tb) for prop in self._props: # Restore the original contact parameters for all geoms in the prop we're # about to place, so that we can detect if the new pose results in # collisions. self._restore_contact_parameters(physics, prop, cached_contact_params) success = False initial_position, initial_quaternion = prop.get_pose(physics) next_position, next_quaternion = initial_position, initial_quaternion for _ in range(self._max_attempts_per_prop): next_position = variation.evaluate(self._position, initial_value=initial_position, current_value=next_position, random_state=random_state) next_quaternion = variation.evaluate(self._quaternion, initial_value=initial_quaternion, current_value=next_quaternion, random_state=random_state) prop.set_pose(physics, next_position, next_quaternion) try: # If this pose results in collisions then there's a chance we'll # encounter a PhysicsError error here due to a full contact buffer, # in which case reject this pose and sample another. physics.forward() except control.PhysicsError: continue if (self._ignore_collisions or not self._has_collisions_with_prop(physics, prop)): success = True break if not success: raise RuntimeError(_REJECTION_SAMPLING_FAILED.format( model_name=prop.mjcf_model.model, max_attempts=self._max_attempts_per_prop)) for prop in ignore_contacts_with_entities: self._restore_contact_parameters(physics, prop, cached_contact_params) if self._settle_physics: original_time = physics.data.time props_isolator = utils.JointStaticIsolator(physics, self._prop_joints) prop_joints_mj = physics.bind(self._prop_joints) while physics.data.time - original_time < self._max_settle_physics_time: with props_isolator: physics.step() if (np.max(np.abs(prop_joints_mj.qvel)) < _SETTLE_QVEL_TOL and np.max(np.abs(prop_joints_mj.qacc)) < _SETTLE_QACC_TOL): break physics.data.time = original_time
def test_getitem(self): value = deterministic.Constant(np.array([4, 5, 6, 7, 8])) np.testing.assert_array_equal(variation.evaluate(value[[3, 1]]), [7, 5])
def initialize_episode_mjcf(self, random_state): self._arena.regenerate(random_state=random_state) target_x, target_y = variation.evaluate( self._target_spawn_position, random_state=random_state) self._target.pos = [target_x, target_y, 0.]