def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cube = BoxObject( size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) self.mujoco_objects = OrderedDict([]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation. cube = FullyFrictionalBoxObject( size=self.boxobject_size, friction=self.boxobject_friction, density=self.boxobject_density, rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.cube = BoxObject( name="cube", size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], ) self.goal = BoxObject( name="goal", size_min=[0.005, 0.005, 0.005], size_max=[0.005, 0.005, 0.005], rgba=[0, 0, 1, 1], density=11342, # Density of lead such that the goal is hard to move ) # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects([self.cube, self.goal]) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=[self.cube, self.goal], x_range=[-0.20, 0.20], y_range=[-0.20, 0.20], rotation=None, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=[self.cube, self.goal], )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") self.mujoco_objects = OrderedDict([("pot", self.pot)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, joints=[], # ensures that door object does not have a free joint ) self.mujoco_objects = OrderedDict([("Door", door)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects()
def _load_model(self): """ Loads an xml model, puts it in self.model """ SingleArmEnv._load_model(self) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) cube_material = self._get_cube_material() self.cube = BoxObject( name="cube", size_min=(0.025, 0.025, 0.025), size_max=(0.025, 0.025, 0.025), rgba=[1, 0, 0, 1], material=cube_material, ) self.placement_initializer.reset() self.placement_initializer.add_objects(self.cube) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.cube, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera( camera_name="agentview", pos=[0.5986131746834771, -4.392035683362857e-09, 1.5903500240372423], quat=[0.6380177736282349, 0.3048497438430786, 0.30484986305236816, 0.6380177736282349] ) # initialize objects of interest self.door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, ) # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.door) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.door, x_range=[0.07, 0.09], y_range=[-0.01, 0.01], rotation=(-np.pi / 2. - 0.25, -np.pi / 2.), rotation_axis='z', ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.door, )
def _load_model(self): """ Loads the arena and pot object. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.45 + self.table_full_size[0] / 2, 0, 0]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, self.object_initializer, )
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Load hole object # register object with the corresponding option (objectClass, name, xrange, yrange) if self.task_configs['board'] == 'GMC_assembly': self.register_object(my_object.GMC_Assembly_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'GMC_plate': self.register_object(my_object.GMC_Plate_Object, 'plate', xrange=[0, 0], yrange=[0, 0]) if self.task_configs['board'] == 'Square_hole_16mm': self.register_object(my_object.square_hole_16mm, 'plate', xrange=[0, 0], yrange=[0, 0]) # Load peg object if self.task_configs['peg'] == '16mm': self.register_object(my_object.Round_peg_16mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '12mm': self.register_object(my_object.Round_peg_12mm_Object, 'peg', xrange=[-0.1, -0.13], yrange=[0.3, 0.33]) elif self.task_configs['peg'] == '9mm': raise NotImplementedError elif self.task_configs['peg'] == 'cylinder_16mm': from robosuite.models.objects.primitive import CylinderObject self.peg = CylinderObject('peg', size=(0.007, 0.025)) self.objects_of_interest.append(self.peg) self.objectsName_of_interest.append('peg') self.objectsXrange_of_interest.append([-0.1, -0.13]) self.objectsYrange_of_interest.append([0.3, 0.33]) # Create Sequential Sampler. The order is same as the order of register. # Create individual samplers per object self.placement_initializer = SequentialCompositeSampler( name="ObjectSampler") for obj_name, default_xy_range in zip( self.objectsName_of_interest, zip(self.objectsXrange_of_interest, self.objectsYrange_of_interest)): self.placement_initializer.append_sampler( sampler=UniformRandomSampler( name=f"{obj_name}Sampler", x_range=default_xy_range[0], y_range=default_xy_range[1], rotation=None, rotation_axis='z', ensure_object_boundary_in_range=True, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, )) # Add objects to the sampler for obj_to_put, obj_name in zip(self.objects_of_interest, self.objectsName_of_interest): self.placement_initializer.add_objects_to_sampler( sampler_name=f"{obj_name}Sampler", mujoco_objects=obj_to_put) if self.task_configs['board'] == 'hole': self.plate = PlateWithHoleObject(name='plate') plate_obj = self.plate.get_obj() plate_obj.set("quat", "0 0 0 1") plate_obj.set("pos", "0 0 {}".format(self.table_offset[2])) self.objects_of_interest.append(self.plate) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.objects_of_interest, )
class TwoArmLift(RobotEnv): """ This class corresponds to the lifting task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table (Default option) controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, rotation=(-np.pi / 3, np.pi / 3), ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 3.0 is provided if the pot is lifted and is parallel within 30 deg to the table Un-normalized summed components if using reward shaping: - Reaching: in [0, 0.5], per-arm component that is proportional to the distance between each arm and its respective pot handle, and exactly 0.5 when grasping the handle - Note that the agent only gets the lifting reward when flipping no more than 30 degrees. - Grasping: in {0, 0.25}, binary per-arm component awarded if the gripper is grasping its correct handle - Lifting: in [0, 1.5], proportional to the pot's height above the table, and capped at a certain threshold Note that the final reward is normalized and scaled by reward_scale / 3.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ reward = 0 # check if the pot is tilted more than 30 degrees mat = T.quat2mat(self._pot_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) cos_z = np.dot(z_unit, z_rotated) cos_30 = np.cos(np.pi / 6) direction_coef = 1 if cos_z >= cos_30 else 0 # check for goal completion: cube is higher than the table top above a margin if self._check_success(): reward = 3.0 * direction_coef # use a shaping reward elif self.reward_shaping: # lifting reward pot_bottom_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] elevation = pot_bottom_height - table_height r_lift = min(max(elevation - 0.05, 0), 0.15) reward += 10. * direction_coef * r_lift _gripper_0_to_handle = self._gripper_0_to_handle _gripper_1_to_handle = self._gripper_1_to_handle # gh stands for gripper-handle # When grippers are far away, tell them to be closer # Single bimanual robot setting if self.env_configuration == "bimanual": _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["left_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["right_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["left_finger"], self.pot.handle_1_geoms()))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["right_finger"], self.pot.handle_1_geoms()))) > 0 # Multi single arm setting else: _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper. important_geoms["left_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper. important_geoms["right_finger"], self.pot.handle_2_geoms()))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[1].gripper. important_geoms["left_finger"], self.pot.handle_1_geoms()))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[1].gripper. important_geoms["right_finger"], self.pot.handle_1_geoms()))) > 0 _g0h_dist = np.linalg.norm(_gripper_0_to_handle) _g1h_dist = np.linalg.norm(_gripper_1_to_handle) # Grasping reward if _contacts_0_lf and _contacts_0_rf: reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g0h_dist)) # Grasping reward if _contacts_1_lf and _contacts_1_rf: reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g1h_dist)) if self.reward_scale is not None: reward *= self.reward_scale / 3.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi / 2, -np.pi / 2)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") self.mujoco_objects = OrderedDict([("pot", self.pot)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.pot_body_id = self.sim.model.body_name2id("pot") self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1") self.handle_0_site_id = self.sim.model.site_name2id("pot_handle_2") self.table_top_id = self.sim.model.site_name2id("table_top") self.pot_center_id = self.sim.model.site_name2id("pot_center") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "left_" pr1 = self.robots[0].robot_model.naming_prefix + "right_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.pot_body_id]) cube_quat = T.convert_quat( self.sim.data.body_xquat[self.pot_body_id], to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat di[pr0 + "eef_xpos"] = self._eef0_xpos di[pr1 + "eef_xpos"] = self._eef1_xpos di["handle_0_xpos"] = np.array(self._handle_0_xpos) di["handle_1_xpos"] = np.array(self._handle_1_xpos) di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle) di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle) di["object-state"] = np.concatenate([ di["cube_pos"], di["cube_quat"], di[pr0 + "eef_xpos"], di[pr1 + "eef_xpos"], di["handle_0_xpos"], di["handle_1_xpos"], di[pr0 + "gripper_to_handle"], di[pr1 + "gripper_to_handle"], ]) return di def _check_success(self): """ Check if pot is successfully lifted Returns: bool: True if pot is lifted """ pot_bottom_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] # cube is higher than the table top above a margin return pot_bottom_height > table_height + 0.10 def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ robots = robots if type(robots) == list or type(robots) == tuple else [ robots ] if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel": # Specifically two robots should be inputted! is_bimanual = False if type(robots) is not list or len(robots) != 2: raise ValueError( "Error: Exactly two single-armed robots should be inputted " "for this task configuration!") elif self.env_configuration == "bimanual": is_bimanual = True # Specifically one robot should be inputted! if type(robots) is list and len(robots) != 1: raise ValueError( "Error: Exactly one bimanual robot should be inputted " "for this task configuration!") else: # This is an unknown env configuration, print error raise ValueError( "Error: Unknown environment configuration received. Only 'bimanual'," "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}" .format(self.env_configuration)) # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual) for robot in robots: if check_type(robot, RobotType.bimanual) != is_bimanual: raise ValueError( "Error: For {} configuration, expected bimanual check to return {}; " "instead, got {}.".format( self.env_configuration, is_bimanual, check_type(robot, RobotType.bimanual))) @property def _handle_0_xpos(self): """ Grab the position of the left (blue) hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.site_xpos[self.handle_0_site_id] @property def _handle_1_xpos(self): """ Grab the position of the right (green) hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.site_xpos[self.handle_1_site_id] @property def _pot_quat(self): """ Grab the orientation of the pot body. Returns: np.array: (x,y,z,w) quaternion of the pot body """ return T.convert_quat(self.sim.data.body_xquat[self.pot_body_id], to="xyzw") @property def _world_quat(self): """ Grab the world orientation Returns: np.array: (x,y,z,w) world quaternion """ return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _eef0_xpos(self): """ Grab the position of Robot 0's end effector. Returns: np.array: (x,y,z) position of EEF0 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]]) else: return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _eef1_xpos(self): """ Grab the position of Robot 1's end effector. Returns: np.array: (x,y,z) position of EEF1 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]]) else: return np.array( self.sim.data.site_xpos[self.robots[1].eef_site_id]) @property def _gripper_0_to_handle(self): """ Calculate vector from the left gripper to the left pot handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_0_xpos - self._eef0_xpos @property def _gripper_1_to_handle(self): """ Calculate vector from the right gripper to the right pot handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_1_xpos - self._eef1_xpos
class PandaLift(PandaEnv): dof = 8 """ This class corresponds to the lifting task for the Panda robot arm. """ parameters_spec = { **PandaEnv.parameters_spec, 'table_size_0': [0.7, 0.9], 'table_size_1': [0.7, 0.9], 'table_size_2': [0.7, 0.9], 'table_friction_0': [0.4, 1.6], 'table_friction_1': [0.0025, 0.0075], 'table_friction_2': [0.00005, 0.00015], 'boxobject_friction_0': [0.4, 1.6], # 'boxobject_friction_1': [0.0025, 0.0075], # fixed this to zero 'boxobject_friction_2': [0.00005, 0.00015], 'boxobject_density_1000': [0.6, 1.4], } def reset_props(self, table_size_0=0.8, table_size_1=0.8, table_size_2=0.8, table_friction_0=1.0, table_friction_1=0.005, table_friction_2=0.0001, boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020, boxobject_friction_0=1.0, boxobject_friction_1=0.0, boxobject_friction_2=0.0001, boxobject_density_1000=0.1, **kwargs): self.table_full_size = (table_size_0, table_size_1, table_size_2) self.table_friction = (table_friction_0, table_friction_1, table_friction_2) self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2) self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2) self.boxobject_density = boxobject_density_1000 * 1000. super().reset_props(**kwargs) def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (1.0, 0.005, 0.0001) self.boxobject_size = (0.02, 0.02, 0.02) self.boxobject_friction = (1.0, 0.005, 0.0001) self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation. cube = FullyFrictionalBoxObject( size=self.boxobject_size, friction=self.boxobject_friction, density=self.boxobject_density, rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # reaching reward cube_pos = self.sim.data.body_xpos[self.cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cube_pos) reaching_reward = 1 - np.tanh(10.0 * dist) reward += reaching_reward # grasping reward touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_left_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_right_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if touch_left_finger and touch_right_finger: reward += 0.25 return reward def put_raw_object_obs(self, di): di['cube_pos'] = np.array(self.sim.data.body_xpos[self.cube_body_id]) di['cube_quat'] = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di['gripper_site_pos'] = np.array( self.sim.data.site_xpos[self.eef_site_id]) def process_object_obs(self, di): gripper_to_cube = di['gripper_site_pos'] - di['cube_pos'] task_state = np.concatenate( [di['cube_pos'], di['cube_quat'], gripper_to_cube]) di['task_state'] = task_state def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: self.put_raw_object_obs(di) if self.object_obs_process: self.process_object_obs(di) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] # cube is higher than the table top above a margin return cube_height > table_height + 0.04 def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) self.cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) self.cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) cubes = [self.cubeA, self.cubeB] # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(cubes) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=cubes, x_range=[-0.08, 0.08], y_range=[-0.08, 0.08], rotation=None, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, z_offset=0.01, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=cubes, )
class BaxterLift(BaxterEnv): """ This class corresponds to the bimanual lifting task for the Baxter robot. """ def __init__(self, gripper_type_right="TwoFingerGripper", gripper_type_left="LeftTwoFingerGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_object_obs=True, reward_shaping=True, **kwargs): """ Args: gripper_type_right (str): type of gripper used on the right hand. gripper_type_lefft (str): type of gripper used on the right hand. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_object_obs (bool): if True, include object (pot) information in the observation. reward_shaping (bool): if True, use dense rewards. Inherits the Baxter environment; refer to other parameters described there. """ # initialize the pot self.pot = PotWithHandlesObject() self.mujoco_objects = OrderedDict([("pot", self.pot)]) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping self.object_initializer = UniformRandomSampler( x_range=(-0.15, -0.04), y_range=(-0.015, 0.015), z_rotation=(-0.15 * np.pi, 0.15 * np.pi), ensure_object_boundary_in_range=False, ) super().__init__(gripper_left=gripper_type_left, gripper_right=gripper_type_right, **kwargs) def _load_model(self): """ Loads the arena and pot object. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.45 + self.table_full_size[0] / 2, 0, 0]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, self.object_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flattened array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("pot") self.handle_1_site_id = self.sim.model.site_name2id("pot_handle_1") self.handle_2_site_id = self.sim.model.site_name2id("pot_handle_2") self.table_top_id = self.sim.model.site_name2id("table_top") self.pot_center_id = self.sim.model.site_name2id("pot_center") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.model.place_objects() def reward(self, action): """ Reward function for the task. 1. the agent only gets the lifting reward when flipping no more than 30 degrees. 2. the lifting reward is smoothed and ranged from 0 to 2, capped at 2.0. the initial lifting reward is 0 when the pot is on the table; the agent gets the maximum 2.0 reward when the pot’s height is above a threshold. 3. the reaching reward is 0.5 when the left gripper touches the left handle, or when the right gripper touches the right handle before the gripper geom touches the handle geom, and once it touches we use 0.5 """ reward = 0 cube_height = self.sim.data.site_xpos[ self.pot_center_id][2] - self.pot.get_top_offset()[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] # check if the pot is tilted more than 30 degrees mat = T.quat2mat(self._pot_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) cos_z = np.dot(z_unit, z_rotated) cos_30 = np.cos(np.pi / 6) direction_coef = 1 if cos_z >= cos_30 else 0 # cube is higher than the table top above a margin if cube_height > table_height + 0.15: reward = 1.0 * direction_coef # use a shaping reward if self.reward_shaping: reward = 0 # lifting reward elevation = cube_height - table_height r_lift = min(max(elevation - 0.05, 0), 0.2) reward += 10. * direction_coef * r_lift l_gripper_to_handle = self._l_gripper_to_handle r_gripper_to_handle = self._r_gripper_to_handle # gh stands for gripper-handle # When grippers are far away, tell them to be closer l_contacts = list( self.find_contacts(self.gripper_left.contact_geoms(), self.pot.handle_1_geoms())) r_contacts = list( self.find_contacts(self.gripper_right.contact_geoms(), self.pot.handle_2_geoms())) l_gh_dist = np.linalg.norm(l_gripper_to_handle) r_gh_dist = np.linalg.norm(r_gripper_to_handle) if len(l_contacts) > 0: reward += 0.5 else: reward += 0.5 * (1 - np.tanh(l_gh_dist)) if len(r_contacts) > 0: reward += 0.5 else: reward += 0.5 * (1 - np.tanh(r_gh_dist)) return reward @property def _handle_1_xpos(self): """Returns the position of the first handle.""" return self.sim.data.site_xpos[self.handle_1_site_id] @property def _handle_2_xpos(self): """Returns the position of the second handle.""" return self.sim.data.site_xpos[self.handle_2_site_id] @property def _pot_quat(self): """Returns the orientation of the pot.""" return T.convert_quat(self.sim.data.body_xquat[self.cube_body_id], to="xyzw") @property def _world_quat(self): """World quaternion.""" return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _l_gripper_to_handle(self): """Returns vector from the left gripper to the handle.""" return self._handle_1_xpos - self._l_eef_xpos @property def _r_gripper_to_handle(self): """Returns vector from the right gripper to the handle.""" return self._handle_2_xpos - self._r_eef_xpos def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = self.sim.data.body_xpos[self.cube_body_id] cube_quat = T.convert_quat( self.sim.data.body_xquat[self.cube_body_id], to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat di["l_eef_xpos"] = self._l_eef_xpos di["r_eef_xpos"] = self._r_eef_xpos di["handle_1_xpos"] = self._handle_1_xpos di["handle_2_xpos"] = self._handle_2_xpos di["l_gripper_to_handle"] = self._l_gripper_to_handle di["r_gripper_to_handle"] = self._r_gripper_to_handle di["object-state"] = np.concatenate([ di["cube_pos"], di["cube_quat"], di["l_eef_xpos"], di["r_eef_xpos"], di["handle_1_xpos"], di["handle_2_xpos"], di["l_gripper_to_handle"], di["r_gripper_to_handle"], ]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False contact_geoms = (self.gripper_right.contact_geoms() + self.gripper_left.contact_geoms()) for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms or self.sim.model.geom_id2name( contact.geom2) in contact_geoms): collision = True break return collision def _check_success(self): """ Returns True if task is successfully completed """ # cube is higher than the table top above a margin cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] return cube_height > table_height + 0.10
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects()
class Door(RobotEnv): """ This class corresponds to the door opening task for a single robot arm. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. use_latch (bool): if True, uses a spring-loaded handle and latch to "lock" the door closed initially Otherwise, door is instantiated with a fixed handle use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", use_latch=True, use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top (hardcoded since it's not an essential part of the environment) self.table_full_size = (0.8, 0.3, 0.05) self.table_offset = (-0.2, -0.35, 0.8) # reward configuration self.use_latch = use_latch self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[0.07, 0.09], y_range=[-0.01, 0.01], ensure_object_boundary_in_range=False, rotation=(-np.pi / 2. - 0.25, -np.pi / 2.), rotation_axis='z', ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 1.0 is provided if the door is opened Un-normalized summed components if using reward shaping: - Reaching: in [0, 0.25], proportional to the distance between door handle and robot arm - Rotating: in [0, 0.25], proportional to angle rotated by door handled - Note that this component is only relevant if the environment is using the locked door version Note that a successfully completed task (door opened) will return 1.0 irregardless of whether the environment is using sparse or shaped rewards Note that the final reward is normalized and scaled by reward_scale / 1.0 as well so that the max score is equal to reward_scale Args: action (np.array): [NOT USED] Returns: float: reward value """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # else, we consider only the case if we're using shaped rewards elif self.reward_shaping: # Add reaching component dist = np.linalg.norm(self._gripper_to_handle) reaching_reward = 0.25 * (1 - np.tanh(10.0 * dist)) reward += reaching_reward # Add rotating component if we're using a locked door if self.use_latch: handle_qpos = self.sim.data.qpos[self.handle_qpos_addr] reward += np.clip(0.25 * np.abs(handle_qpos / (0.5 * np.pi)), -0.25, 0.25) # Scale reward if requested if self.reward_scale is not None: reward *= self.reward_scale / 1.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_offset=self.table_offset, ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest door = DoorObject( name="Door", friction=0.0, damping=0.1, lock=self.use_latch, joints=[], # ensures that door object does not have a free joint ) self.mujoco_objects = OrderedDict([("Door", door)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.object_body_ids = dict() self.object_body_ids["door"] = self.sim.model.body_name2id("door") self.object_body_ids["frame"] = self.sim.model.body_name2id("frame") self.object_body_ids["latch"] = self.sim.model.body_name2id("latch") self.door_handle_site_id = self.sim.model.site_name2id("door_handle") self.hinge_qpos_addr = self.sim.model.get_joint_qpos_addr("door_hinge") if self.use_latch: self.handle_qpos_addr = self.sim.model.get_joint_qpos_addr( "latch_joint") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for the Door object door_pos, door_quat = self.model.place_objects() door_body_id = self.sim.model.body_name2id("Door") self.sim.model.body_pos[door_body_id] = door_pos[0] self.sim.model.body_quat[door_body_id] = door_quat[0] def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix pr = self.robots[0].robot_model.naming_prefix eef_pos = self._eef_xpos door_pos = np.array( self.sim.data.body_xpos[self.object_body_ids["door"]]) handle_pos = self._handle_xpos hinge_qpos = np.array([self.sim.data.qpos[self.hinge_qpos_addr]]) di["door_pos"] = door_pos di["handle_pos"] = handle_pos di[pr + "door_to_eef_pos"] = door_pos - eef_pos di[pr + "handle_to_eef_pos"] = handle_pos - eef_pos di["hinge_qpos"] = hinge_qpos di['object-state'] = np.concatenate([ di["door_pos"], di["handle_pos"], di[pr + "door_to_eef_pos"], di[pr + "handle_to_eef_pos"], di["hinge_qpos"] ]) # Also append handle qpos if we're using a locked door version with rotatable handle if self.use_latch: handle_qpos = np.array( [self.sim.data.qpos[self.handle_qpos_addr]]) di["handle_qpos"] = handle_qpos di['object-state'] = np.concatenate( [di["object-state"], di["handle_qpos"]]) return di def _check_success(self): """ Check if door has been opened. Returns: bool: True if door has been opened """ hinge_qpos = self.sim.data.qpos[self.hinge_qpos_addr] return hinge_qpos > 0.3 def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to door handle if self.robots[0].gripper_visualization: # get distance to door handle dist = np.sum( np.square(self._handle_xpos - self.sim.data.get_site_xpos( self.robots[0].gripper.visualization_sites["grip_site"]))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!" @property def _eef_xpos(self): """ Grabs End Effector position Returns: np.array: End effector(x,y,z) """ return np.array(self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _handle_xpos(self): """ Grabs the position of the door handle handle. Returns: np.array: Door handle (x,y,z) """ return self.sim.data.site_xpos[self.door_handle_site_id] @property def _gripper_to_handle(self): """ Calculates distance from the gripper to the door handle. Returns: np.array: (x,y,z) distance between handle and eef """ return self._handle_xpos - self._eef_xpos
# 2. Add the Robot from robosuite.models.robots import Panda mujoco_robot = Panda() mujoco_robot.set_base_xpos([0, 0, 0]) # xyz mujoco_robot.set_base_ori([0, 0, 0]) # rpy world.merge(mujoco_robot) # takes xml/list of xmls #------------------------------------------------------------------------ # 3. Create Arena: table # Initialize the TableArena instance taht creates a table and floorplane from robosuite.models.arenas import TableArena #mujoco_arena = TableArena() #mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05)) #mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05),has_legs=False) mujoco_arena = TableArena(table_full_size=(0.4, 0.8, 0.05), table_offset=(0.4, 0, 0.1)) mujoco_arena.set_origin([0.4, 0, 0]) world.merge(mujoco_arena) #------------------------------------------------------------------------ # 4. Run simulation model = world.get_model(mode="mujoco_py") from mujoco_py import MjSim, MjViewer sim = MjSim(model) # view it viewer = MjViewer(sim) # creates viewing window viewer.vopt.geomgroup[0] = 0 # disable visualization of collision mesh
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"](self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation in zip(self.robots, (np.pi/2, -np.pi/2)): xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"](self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=self.table_offset, ) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.pot = PotWithHandlesObject(name="pot") # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.pot) else: self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.pot, x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, rotation=(np.pi + -np.pi / 3, np.pi + np.pi / 3), ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.pot, )
from robosuite.models.robots import Panda mujoco_robot = Panda() from robosuite.models.grippers import gripper_factory gripper = gripper_factory('PandaGripper') # gripper.hide_visualization() # this doesnt work mujoco_robot.add_gripper(gripper) mujoco_robot.set_base_xpos([0, 0, 0]) world.merge(mujoco_robot) from robosuite.models.arenas import TableArena mujoco_arena = TableArena() mujoco_arena.set_origin([0.8, 0, 0]) world.merge(mujoco_arena) from robosuite.models.objects import BallObject from robosuite.utils.mjcf_utils import new_joint sphere = BallObject(name="sphere", size=[0.04], rgba=[0, 0.5, 0.5, 1]).get_obj() sphere.set('pos', '1.0 0 1.0') world.worldbody.append(sphere) model = world.get_model(mode="mujoco_py") from mujoco_py import MjSim, MjViewer
class FetchPush(RobotEnv): """ This class corresponds to the fetch/push task for a single robot arm. Many similarities with lifting task. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(5e-3, 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, distance_threshold=0.06, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.distance_threshold = distance_threshold # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.22, 0.22], y_range=[-0.22, 0.22], ensure_object_boundary_in_range=False, rotation=None, z_offset=0.002, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def _distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape return np.linalg.norm(goal_a - goal_b) def reward(self, action=None): """ Reward function for the task. Args: action (np array): [NOT USED] Returns: float: reward value """ reward = .0 cube_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id] cube_to_goal_dist = self._distance(cube_pos, goal_pos) gripper_to_cube_dist = self._distance(gripper_site_pos, cube_pos) if self.reward_shaping: # Return large reward if cube has been moved to goal if self._check_success(): reward += 2.25 # Reaching reward reward += 1 - np.tanh(10.0 * gripper_to_cube_dist) if self._is_gripper_touching_cube(): reward += 0.5 # Reward for touching cube reward += 1.5 - 1.5 * np.tanh( 10.0 * cube_to_goal_dist) # Reward for pushing cube # Return large penalty if robot has moved significantly away from cube #if self._has_moved_significantly_away_from_cube(): # reward += -2 # Reward for touching cube and pushing toward goal #if self._is_gripper_touching_cube(): # reward += 0.1 + (1 - np.tanh(10.0 * cube_to_goal_dist)) # Give penalty for touching table #if self._is_gripper_touching_table(): # reward -= 0.1 else: reward += -float(not self._check_success()) return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cube = BoxObject( name="cube", size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], material=redwood, friction=[0.5, 5e-3, 1e-4]) goal_cube = BoxObject( name="goal_cube", size_min=[0.005, 0.005, 0.005], size_max=[0.005, 0.005, 0.005], rgba=[0, 0, 1, 1], density=11342, # Density of lead such that the goal won't be moved ) self.mujoco_objects = OrderedDict([("cube", cube), ("goal_cube", goal_cube)]) self.n_objects = len(self.mujoco_objects) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.cube_body_id = self.sim.model.body_name2id("cube") self.goal_cube_body_id = self.sim.model.body_name2id("goal_cube") if self.robots[0].gripper_type == 'UltrasoundProbeGripper': self.probe_geom_id = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["probe"] ] elif self.robots[0].has_gripper: self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] self.cube_geom_id = self.sim.model.geom_name2id("cube") self.goal_cube_geom_id = self.sim.model.geom_name2id("goal_cube") self.table_geom_id = self.sim.model.geom_name2id("table_collision") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # Get robot prefix pr = self.robots[0].robot_model.naming_prefix if self.robots[0].has_gripper: # Checking if the UltrasoundProbeGripper is used if self.robots[0].gripper.dof == 0: # Remove unused keys (no joints in gripper) di.pop('robot0_gripper_qpos', None) di.pop('robot0_gripper_qvel', None) # low-level object information if self.use_object_obs: # position and rotation of object goal_pos = np.array( self.sim.data.body_xpos[self.goal_cube_body_id]) cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) di["cube_pos"] = cube_pos di["goal_pos"] = goal_pos di[pr + "cube_to_goal"] = cube_pos - goal_pos cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") gripper_site_pos = np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) di[pr + "gripper_to_cube"] = gripper_site_pos - cube_pos # Used for GymWrapper observations (Robot state will also default be added e.g. eef position) di["object-state"] = np.concatenate([ cube_pos, cube_quat, goal_pos, di[pr + "gripper_to_cube"], di[pr + "gripper_to_cube"], ]) return di def _check_success(self): """ Check if cube has been pushed to goal. Returns: bool: True if cube has been pushed to goal """ cube_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.body_xpos[self.goal_cube_body_id] cube_to_goal_dist = self._distance(cube_pos, goal_pos) return cube_to_goal_dist < self.distance_threshold ''' def _has_moved_significantly_away_from_cube(self): """ Check if the robot has moved away from cube between steps. Returns: bool: True if episode is the robot's end-effector has moved far away from cube """ return self.gripper_to_cube_dist > self.initial_gripper_to_cube_dist + 0.12 def _check_terminated(self): """ Check if the task has completed one way or another. The following conditions lead to termination: - Task completion (cube pushed to goal) Should this be added? - Robot moving away from cube Returns: bool: True if episode is terminated """ return self._has_moved_significantly_away_from_cube() ''' def _is_gripper_touching_cube(self): """ Check if the gripper is in contact with the cube Returns: bool: True if contact """ for contact in self.sim.data.contact[:self.sim.data.ncon]: geom_name1 = self.sim.model.geom_id2name(contact.geom1) geom_name2 = self.sim.model.geom_id2name(contact.geom2) if (geom_name1 in self.robots[0].gripper.contact_geoms and geom_name2 == self.sim.model.geom_id2name( self.cube_geom_id) or geom_name2 in self.robots[0].gripper.contact_geoms and geom_name1 == self.sim.model.geom_id2name( self.cube_geom_id)): return True return False def _is_gripper_touching_table(self): """ Check if the gripper is in contact with the tabletop Returns: bool: True if contact """ for contact in self.sim.data.contact[:self.sim.data.ncon]: geom_name1 = self.sim.model.geom_id2name(contact.geom1) geom_name2 = self.sim.model.geom_id2name(contact.geom2) if (geom_name1 in self.robots[0].gripper.contact_geoms and geom_name2 == self.sim.model.geom_id2name( self.table_geom_id) or geom_name2 in self.robots[0].gripper.contact_geoms and geom_name1 == self.sim.model.geom_id2name( self.table_geom_id)): return True return False def _post_action(self, action): """ In addition to super method, add additional info if requested Args: action (np.array): Action to execute within the environment Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) info about current env step """ reward, done, info = super()._post_action(action) #done = done or self._check_terminated() return reward, done, info def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.robots[0].gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos( self.robots[0].gripper. visualization_sites["grip_site"]))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!"
class SawyerPrimitivePick(SawyerEnv): """ This class corresponds to the lifting task for the Sawyer robot arm. """ def __init__( self, instructive=0.0, decay=0.0, random_arm_init=None, gripper_type="TwoFingerGripper", table_full_size=(0.8, 0.8, 0.6), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.random_arm_init = random_arm_init self.instructive = instructive self.instructive_counter = 0 self.eval_mode = False self.decay = decay # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.00, 0.00], y_range=[-0.00, 0.00], ensure_object_boundary_in_range=False, z_rotation=None, ) # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal # dim: 3, 2, 3, 3 # low = -np.ones(11) * np.inf # high = np.ones(11) * np.inf # self.observation_space = spaces.Box(low=low, high=high) self.controller = SawyerIKController( bullet_data_path=os.path.join(robosuite.models.assets_root, "bullet_data"), robot_jpos_getter=self._robot_jpos_getter, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cube = BoxObject( # size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], # size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) size_min=[0.015, 0.015, 0.015], size_max=[0.015, 0.015, 0.015], rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super(SawyerEnv, self)._reset_internal() self.model.place_objects() if self.random_arm_init: # random initialization of arm constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), self.table_full_size[2] + 0.15211762 ]) self.controller.sync_ik_robot(self._robot_jpos_getter(), simulate=True) joint_list = self.controller.inverse_kinematics( target_position, constant_quat) init_pos = np.array(joint_list) else: # default robosuite init init_pos = np.array( [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628]) init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = init_pos self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array( [0.0115, -0.0115]) # Open self.sim.forward() self.sim.data.qpos[10:12] = self.sim.data.site_xpos[ self.eef_site_id][:2] # decay rate (1 / (1 + decay_param * #resets)) chance = self.instructive * ( 1 / (1 + self.decay * self.instructive_counter)) if np.random.uniform() < chance: # and not self.eval_mode: self.sim.data.qpos[13] = self.sim.data.site_xpos[ self.eef_site_id][2] self.sim.data.qpos[self._ref_gripper_joint_pos_indexes] = np.array( [-0.0, -0.0]) #np.array([-0.21021952, -0.00024167]) # gripped self.instructive_counter = self.instructive_counter + 1 cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) self.goal = cube_pos + np.array((0, 0, 0.075)) def _robot_jpos_getter(self): return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161]) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ cube_height = self.sim.data.body_xpos[self.cube_body_id] return self.compute_reward(cube_height, self.goal) # for goalenv wrapper def compute_reward(self, achieved_goal, desired_goal, info=None): # -1 if cube is below, 0 if cube is above reward = -1.0 if achieved_goal[2] > desired_goal[2]: reward = 100.0 return reward # for goalenv wrapper def get_goalenv_dict(self, obs_dict): # using only object-state and robot-state ob_lst = [] di = {} for key in obs_dict: if key in ["robot-state", "object-state"]: ob_lst.append(obs_dict[key]) di['observation'] = np.concatenate(ob_lst) di['desired_goal'] = self.goal di['achieved_goal'] = obs_dict['object-state'][0:3] return di def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cube"] = gripper_site_pos - cube_pos # di["object-state"] = np.concatenate( # [cube_pos, cube_quat, di["gripper_to_cube"]] # ) di["object-state"] = np.concatenate( [di["gripper_to_cube"], cube_pos]) return di def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class BaxterReach(BaxterEnv): def __init__(self, table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_object_obs=True, **kwargs): """ Args: table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_object_obs (bool): if True, include object (pot) information in the observation. reward_shaping (bool): if True, use dense rewards. Inherits the Baxter environment; refer to other parameters described there. """ self.mujoco_objects = OrderedDict() # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = False self.object_initializer = None super().__init__(use_indicator_object=True, gripper_left="LeftTwoFingerGripper", gripper_right="TwoFingerGripper", **kwargs) def _load_model(self): """ Loads the arena and pot object. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.45 + self.table_full_size[0] / 2, 0, 0]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, self.object_initializer, ) def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flattened array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.table_top_id = self.sim.model.site_name2id("table_top") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() goal = np.random.uniform(-0.3, 0.3, size=3) goal[2] = 0 goal += self.model.table_top_offset self.goal = goal @property def goal(self): return self.sim.data.qpos[self._ref_indicator_pos_low:self. _ref_indicator_pos_low + 3] @goal.setter def goal(self, new_goal): self.move_indicator(new_goal) def get_dist(self): goal = self.goal return np.linalg.norm(self._l_eef_xpos - goal) + np.linalg.norm(self._r_eef_xpos - goal) def reward(self, action): """ Reward function for the task. """ dist = self.get_dist() return -dist @property def _world_quat(self): """World quaternion.""" return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() di['object-state'] = self.goal.copy() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False contact_geoms = (self.gripper_right.contact_geoms() + self.gripper_left.contact_geoms()) for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name(contact.geom1) in contact_geoms or self.sim.model.geom_id2name( contact.geom2) in contact_geoms): collision = True break return collision def _check_success(self): """ Returns True if task is successfully completed """ dist = self.get_dist() # if dist < 1: # print('dist:', dist) if dist < 0.2: return True return False
class SawyerReach(SawyerEnv): """ This class corresponds to the a primitive policy for the reach task on the Sawyer robot arm. Uniform random sample on x, y, or z axis in range of 20 to 60 cm for x and y, 0 to 40 cm for z (center of table) """ def __init__( self, gripper_type="TwoFingerGripper", lower_goal_range=[-0.1, -0.1, -0.1], upper_goal_range=[0.1, 0.1, 0.2], seed=False, random_arm_init=None, # please pass in [low, high] table_full_size=(0.8, 0.8, 0.6), table_friction=(1., 5e-3, 1e-4), use_camera_obs=False, use_object_obs=True, reward_shaping=True, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: prim_axis (str): which axis is being explored gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.upper_goal_range = upper_goal_range self.lower_goal_range = lower_goal_range self.random_arm_init = random_arm_init self.seed = seed self.distance_threshold = 0.015 # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer self.placement_initializer = placement_initializer # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal # dim: 3, 2, 3, 3 # low = -np.ones(11) * np.inf # high = np.ones(11) * np.inf # self.observation_space = spaces.Box(low=low, high=high) self.controller = SawyerIKController( bullet_data_path=os.path.join(robosuite.models.assets_root, "bullet_data"), robot_jpos_getter=self._robot_jpos_getter, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) self.mujoco_objects = OrderedDict([]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] # self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() if self.random_arm_init is not None: if self.seed: constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(0.0, 0.0), np.random.uniform(0.0, 0.0), # self.table_full_size[2] + 0.15211762]) 0.8 + 0.20211762 ]) else: # random initialization of arm constant_quat = np.array( [-0.01704371, -0.99972409, 0.00199679, -0.01603944]) target_position = np.array([ 0.5 + np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), np.random.uniform(self.random_arm_init[0], self.random_arm_init[1]), self.table_full_size[2] + 0.20211762 ]) self.controller.sync_ik_robot(self._robot_jpos_getter(), simulate=True) joint_list = self.controller.inverse_kinematics( target_position, constant_quat) init_pos = np.array(joint_list) else: # default robosuite init init_pos = np.array( [-0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628]) init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reset(self): self._destroy_viewer() self._reset_internal() self.sim.forward() # reset goal (marker) gripper_site_pos = np.array(self.sim.data.site_xpos[self.eef_site_id]) distance = np.random.uniform(self.lower_goal_range, self.upper_goal_range) while np.linalg.norm(distance) <= (self.distance_threshold * 1.75): distance = np.random.uniform(self.lower_goal_range, self.upper_goal_range) if self.seed: distance = np.array((0.05, -0.1, 0.1)) self.goal = gripper_site_pos + distance return self._get_observation() def _robot_jpos_getter(self): return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161]) def reward(self, action=None): """ Reward function for the task. The dense reward has one component. Reaching: in [0, 1], to encourage the arm to reach the marker The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ # velocity_pen = np.linalg(np.array( # [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes] # )) marker_pos = self.goal gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] return self.compute_reward(gripper_site_pos, marker_pos, None) # for goalenv wrapper def compute_reward(self, achieved_goal, desired_goal, info=None): d = np.linalg.norm(achieved_goal - desired_goal) if self.reward_shaping: # dense reward = 1 - np.tanh(10 * d) if d <= self.distance_threshold: reward = 10.0 else: # sparse (-1 or 0) #reward = -np.float32(d > distance_threshold) if d > self.distance_threshold: reward = -1.0 else: reward = 60.0 return reward # for goalenv wrapper # TODO check the desired_goal/achieved_goal for indexing def get_goalenv_dict(self, obs_dict): # using only object-state and robot-state ob_lst = [] di = {} for key in obs_dict: if key in ["robot-state", "object-state"]: ob_lst.append(obs_dict[key]) di['observation'] = np.concatenate(ob_lst) di['desired_goal'] = obs_dict['object-state'][0:3] di['achieved_goal'] = obs_dict['robot-state'][23:26] return di def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_marker"] = gripper_site_pos - self.goal di["object-state"] = np.concatenate( [di["gripper_to_marker"], self.goal]) return di def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class SawyerReach(SawyerEnv): """ This class corresponds to the reaching for the Sawyer robot arm. The gripper needs to arrive 2cm above a specified goal on the table. """ def __init__( self, gripper_type="PushingGripper", parameters_to_randomise=None, randomise_initial_conditions=True, table_full_size=(0.8, 1.6, 0.719), use_camera_obs=False, use_object_obs=True, reward_shaping=True, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=50, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, pid=True, success_radius=0.01 ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. parameters_to_randomise [string,] : List of keys for parameters to randomise, None means all the available parameters are randomised randomise_initial_conditions [bool,]: Whether or not to randomise the starting configuration of the task. table_full_size (3-tuple): x, y, and z dimensions of the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. pid (bool) : Whether to use a velocity pid contoler or the mujoco proportional velocity contoler success_radius (bool) : how close to the goal si considered a success. """ self.initialised = False # settings for table self.table_full_size = table_full_size # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping if (self.reward_shaping): self.reward_range = [-np.inf, horizon * (0.1)] else: self.reward_range = [0, 1] # Whether to use dynamics domain randomisation self.parameters_to_randomise = parameters_to_randomise self.randomise_initial_conditions = randomise_initial_conditions self.dynamics_parameters = OrderedDict() self.default_dynamics_parameters = OrderedDict() self.parameter_sampling_ranges = OrderedDict() self.factors_for_param_randomisation = OrderedDict() self.success_radius = success_radius #Param for storing a specific goal starting position self.specific_goal_position = None super().__init__( gripper_type=gripper_type, gripper_visualization=False, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, pid=pid, ) self._set_default_dynamics_parameters(pid) self._set_default_parameter_sampling_ranges() self._set_dynamics_parameters(self.default_dynamics_parameters) self._set_factors_for_param_randomisation(self.default_dynamics_parameters) # Check that the parameters to randomise are within the allowed parameters if (self.parameters_to_randomise is not None): self._check_allowed_parameters(self.parameters_to_randomise) # IK solver for placing the arm at desired locations during reset self.IK_solver = SawyerEEFVelocityController() self.init_control_timestep = self.control_timestep self.init_qpos = self.mujoco_robot.init_qpos gripper_tip_xpos = self.sim.data.get_site_xpos("grip_site").copy() gripper_to_eef_in_world = self.sim.data.get_body_xpos("right_hand") - gripper_tip_xpos self.gripper_size = np.linalg.norm(gripper_to_eef_in_world) # Storing parameters for temporary switching self.cached_parameters_to_randomise = None self.cached_dynamics_parameters = None self.initialised = True self.reset() def _set_dynamics_parameters(self, parameters): self.dynamics_parameters = copy.deepcopy(parameters) def _default_damping_params(self): # return np.array([0.01566, 1.171, 0.4906, 0.1573, 1.293, 0.08688, 0.1942]) # -real world calibration # return np.array([0.8824,2.3357,1.1729, 0.0 , 0.5894, 0.0 ,0.0082]) #- command calibration return np.array([8.19520686e-01, 1.25425414e+00, 1.04222253e+00, 0.00000000e+00, 1.43146116e+00, 1.26807887e-01, 1.53680244e-01,]) #- command calibration 2 def _default_armature_params(self): return np.array([0.00000000e+00, 0.00000000e+00, 2.70022664e-02, 5.35581203e-02, 3.31204140e-01, 2.59623415e-01, 2.81964631e-01,]) def _default_joint_friction_params(self): return np.array([4.14390483e-03, 9.30938506e-02, 2.68656509e-02, 0.00000000e+00, 0.00000000e+00, 4.24867204e-04, 8.62040317e-04]) def _set_default_dynamics_parameters(self, use_pid): """ Setting the the default environment parameters. """ self.default_dynamics_parameters['joint_forces'] = np.zeros((7,)) self.default_dynamics_parameters['acceleration_forces'] = np.zeros((7,)) self.default_dynamics_parameters['eef_forces'] = np.zeros((6,)) self.default_dynamics_parameters['eef_timedelay'] = np.asarray(0) self.default_dynamics_parameters['timestep_parameter'] = np.asarray(0.0) self.default_dynamics_parameters['pid_iteration_time'] = np.asarray(0.) self.default_dynamics_parameters['mujoco_timestep'] = np.asarray(0.002) self.default_dynamics_parameters['action_additive_noise'] = np.asarray(0.0) self.default_dynamics_parameters['action_multiplicative_noise'] = np.asarray(0.0) self.default_dynamics_parameters['action_systematic_noise'] = np.asarray(0.0) self.default_dynamics_parameters['eef_obs_position_noise'] = np.asarray(0.0) self.default_dynamics_parameters['eef_obs_velocity_noise'] = np.asarray(0.0) link_masses = np.zeros((7,)) for link_name, idx, body_node, mass_node, joint_node in self._robot_link_nodes_generator(): if (mass_node is not None): dynamics_parameter_value = float(mass_node.get("mass")) link_masses[idx] = dynamics_parameter_value self.default_dynamics_parameters['link_masses'] = link_masses self.default_dynamics_parameters['joint_dampings'] = self._default_damping_params() self.default_dynamics_parameters['armatures'] = self._default_armature_params() self.default_dynamics_parameters['joint_frictions'] = self._default_joint_friction_params() if (use_pid): gains = self.mujoco_robot.velocity_pid_gains kps = np.array([gains['right_j{}'.format(actuator)]['p'] for actuator in range(7)]) kis = np.array([gains['right_j{}'.format(actuator)]['i'] for actuator in range(7)]) kds = np.array([gains['right_j{}'.format(actuator)]['d'] for actuator in range(7)]) # self.default_dynamics_parameters['kps'] = kps self.default_dynamics_parameters['kis'] = kis self.default_dynamics_parameters['kds'] = kds else: kvs = np.zeros((7,)) for target_joint, jnt_idx, node in self._velocity_actuator_nodes_generator(): gains_value = float(node.get("kv")) kvs[jnt_idx] = gains_value self.default_dynamics_parameters['kvs'] = kvs def _set_default_parameter_sampling_ranges(self): """ Returns the parameter ranges to draw samples from in the domain randomisation. """ parameter_ranges = { 'joint_forces': np.array([[0.,0.,0.,0.,0.,0.,0.],[1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5]]),# [2., 1.5, 1.5, 1.5 ,0.75 ,0.5,0.3] 'acceleration_forces': np.array([[0.,0.,0.,0.,0.,0.,0.], [0.12,0.12,0.12,0.12,0.12,0.12,0.12]]),# 'eef_forces': np.array([[0.,0.,0.,0.,0.,0.], [0.06 ,0.06,0.06,0.01,0.01,0.01,]]), # 'eef_timedelay': np.array([0, 1]), 'timestep_parameter': np.array([0.0, 0.01]), 'pid_iteration_time': np.array([0., 0.04]), #-1 and 0 are not allowed values 'mujoco_timestep': np.array([0.001,0.002]), 'action_additive_noise': np.array([0.01, 0.1]), 'action_multiplicative_noise': np.array([0.005,0.02]), 'action_systematic_noise': np.array([-0.05, 0.05]), 'eef_obs_position_noise': np.array([0.0005, 0.001]), 'eef_obs_velocity_noise': np.array([0.0005, 0.001]), 'link_masses': np.array([0.98, 1.02]), 'joint_dampings': np.array([0.5, 2.]), 'armatures': np.array([0.66, 1.5]), 'joint_frictions': np.array([0.66, 1.5]), } if (self.pid): parameter_ranges['kps'] = np.array([0.66, 1.5]) parameter_ranges['kis'] = np.array([0.66, 1.5]) parameter_ranges['kds'] = np.array([0.66, 1.5]) else: parameter_ranges['kvs'] = [0.5, 2] self.parameter_sampling_ranges = parameter_ranges def _set_factors_for_param_randomisation(self, parameters): factors = copy.deepcopy(parameters) factors['joint_forces'] = np.ones((7,)) factors['acceleration_forces'] = np.ones((7,)) factors['eef_forces'] = np.ones((1,)) factors['eef_timedelay'] = 1.0 factors['timestep_parameter'] = 1.0 factors['pid_iteration_time'] = 1.0 factors['mujoco_timestep'] = 1.0 factors['action_additive_noise'] = 1.0 factors['action_multiplicative_noise'] = 1.0 factors['action_systematic_noise'] = 1.0 factors['eef_obs_position_noise'] = 1.0 factors['eef_obs_velocity_noise'] = 1.0 self.factors_for_param_randomisation = factors def _velocity_actuator_nodes_generator(self): """ Caching the xml nodes for the velocity actuators for use when setting the parameters """ for node in self.model.root.findall(".//velocity[@kv]"): target_joint = node.get("joint") jnt_idx = int(target_joint[-1]) yield target_joint, jnt_idx, node def _robot_link_nodes_generator(self): """ Caching the xml nodes for the velocity actuators for use when setting the parameters """ for link_idx, link_name in enumerate(self.mujoco_robot.links): body_node = self.mujoco_robot.root.find(".//body[@name='{}']".format(link_name)) mass_node = body_node.find("./inertial[@mass]") joint_node = body_node.find("./joint") yield link_name, link_idx, body_node, mass_node, joint_node def _check_allowed_parameters(self, parameters): allowed_parameters = self.get_parameter_keys() for param in parameters: assert param in allowed_parameters, '{} not allowed. Only allowed parameters are {}'.format(param, allowed_parameters) def _select_appropriate_distribution(self, key): ''' Which distribution to use to sample the different dynamics parameters. :param key: The parameter to consider. ''' if ( key == 'joint_forces' or key == 'acceleration_forces' or key == 'eef_forces' or key == 'timestep_parameter' or key == 'pid_iteration_time' or key == 'mujoco_timestep' or key == 'action_additive_noise' or key == 'action_multiplicative_noise' or key == 'action_systematic_noise' or key == 'eef_obs_position_noise' or key == 'eef_obs_velocity_noise' or key == 'link_masses' ): return self.np_random.uniform elif ( key == 'eef_timedelay' ): return self._ranged_random_choice else: return self._loguniform def _loguniform(self, low=1e-10, high=1., size=None): return np.asarray(np.exp(self.np_random.uniform(np.log(low), np.log(high), size))) def _ranged_random_choice(self,low, high, size=1): vals = np.arange(low,high+1) return self.np_random.choice(vals, size) def _parameter_for_randomisation_generator(self, parameters=None): ''' Generates (key,value) pairs of sampled dynamics parameters. :param parameters: The parameters to be sampled for randomisation, if None, all the allowed parameters are sampled. ''' parameter_ranges = self.parameter_sampling_ranges if (parameters is None): parameters = self.get_parameter_keys() for key in parameters: parameter_range = parameter_ranges[key] if (parameter_range.shape[0] == 1): yield key, np.asarray(parameter_range[0]) elif (parameter_range.shape[0] == 2): distribution = self._select_appropriate_distribution(key) size = self.default_dynamics_parameters[key].shape yield key, np.asarray( self.factors_for_param_randomisation[key] * distribution(*parameter_ranges[key], size=size)) else: raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN') def _load_model(self): """ Loads an xml model, puts it in self.model. This sets up the mujoco xml for the scene. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) ### Domain Randomisation ### if (self.initialised): for key, val in self._parameter_for_randomisation_generator(parameters=self.parameters_to_randomise): self.dynamics_parameters[key] = val ## Queues for adding time delays self.eef_pos_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1)) self.eef_vel_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1)) if (self.pid is not None): self.pid.sample_time = self.dynamics_parameters['pid_iteration_time'] ### Create the Task ### ## Load the Arena ## self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=(0.1, 0.001, 0.001) ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0]) goal = BoxObject(size=[0.023 / 2, 0.023 / 2, 0.001 / 2], rgba=[0, 1, 0, 1], ) ## Put everything together into the task ## self.model = ReachTask(self.mujoco_arena, self.mujoco_robot, goal) ### Set the goal position ### # Gripper position at neutral gripper_pos_neutral = [0.44969246, 0.16029991, 1.00875409] if(self.specific_goal_position is not None): init_pos = np.array([gripper_pos_neutral[0] +self.specific_goal_position[0], gripper_pos_neutral[1] + self.specific_goal_position[1], self.model.table_top_offset[2]]) init_pos = array_to_string(init_pos) elif (self.randomise_initial_conditions): # Place goal in a 20x20cm square directly below the eef neutral position. noise = self.np_random.uniform(-1, 1, 3) * np.array([0.20, 0.20, 0.0]) offset = np.array([gripper_pos_neutral[0], gripper_pos_neutral[1], self.model.table_top_offset[2]]) init_pos = array_to_string(noise + offset) else: init_pos = np.concatenate([gripper_pos_neutral[:2], [self.model.table_top_offset[2]]]) init_pos = array_to_string(init_pos) self.model.xml_goal.set("pos", init_pos) ### Set the xml parameters to the values given by the dynamics_parameters attribute ### if (self.initialised): self._apply_xml_dynamics_parameters() def _apply_xml_dynamics_parameters(self): """ Applying the values contained in dynamics_parameters to the xml elements of the model. If a pid is used this also applied the pid gains contained in the dynamics parameters. """ opt_node = self.model.root.find('option') opt_node.set("timestep", str(self.dynamics_parameters['mujoco_timestep'])) for link_name, idx, body_node, mass_node, joint_node in self._robot_link_nodes_generator(): if (mass_node is not None): mass_node.set("mass", str(self.dynamics_parameters['link_masses'][idx])) if (joint_node is not None): joint_node.set("damping", str(self.dynamics_parameters['joint_dampings'][idx])) joint_node.set("armature", str(self.dynamics_parameters['armatures'][idx])) joint_node.set("frictionloss", str(self.dynamics_parameters['joint_frictions'][idx])) if (self.pid): self.pid.tunings = (self.dynamics_parameters['kps'], self.dynamics_parameters['kis'], self.dynamics_parameters['kds'], ) else: for target_joint, jnt_idx, node in self._velocity_actuator_nodes_generator(): node.set("kv", str(self.dynamics_parameters['kvs'][jnt_idx])) def set_parameter_sampling_ranges(self, sampling_ranges): ''' Set a new sampling range for the dynamics parameters. :param sampling_ranges: (Dict) Dictionary of the sampling ranges for the different parameters of the form (param_name, range) where param_name is a valid param name string and range is a numpy array of dimensionality {1,2}xN where N is the dimension of the given parameter ''' for candidate_name, candidate_value in sampling_ranges.items(): assert candidate_name in self.parameter_sampling_ranges, 'Valid parameters are {}'.format(self.parameter_sampling_ranges.keys()) assert candidate_value.shape[0] == 1 or candidate_value.shape[0]==2, 'First dimension of the sampling parameter needs to have value 1 or 2' assert len(candidate_value.shape) == len(self.parameter_sampling_ranges[candidate_name].shape), '{} has the wrong number of dimensions'.format(candidate_name) if(len(self.parameter_sampling_ranges[candidate_name].shape) >1): assert self.parameter_sampling_ranges[candidate_name].shape[1] == candidate_value.shape[1], '{} has the wrong shape'.format(candidate_name) self.parameter_sampling_ranges[candidate_name] = candidate_value def get_parameter_sampling_ranges(self): return copy.deepcopy(self.parameter_sampling_ranges) def get_parameter_keys(self): return self.default_dynamics_parameters.keys() def get_total_parameter_dimension(self): total_size = 0. for key, val in self.default_dynamics_parameters.items(): total_size += val.size return total_size def get_internal_state(self): return np.concatenate([self._joint_positions, self._joint_velocities]).tolist() def get_internal_state_dimension(self): internal_state = self.get_internal_state() return len(internal_state) def change_parameters_to_randomise(self, parameters): self._check_allowed_parameters(parameters) self._set_dynamics_parameters(self.default_dynamics_parameters) self.parameters_to_randomise = parameters def get_randomised_parameters(self): if (self.parameters_to_randomise is not None): return self.parameters_to_randomise else: return self.get_parameter_keys() def get_randomised_parameter_dimensions(self): """ Return the number of dimensions of the ranomised parameters""" randomised_parameter_names = self.get_randomised_parameters() total_dimension = 0 for param in randomised_parameter_names: param_shape = self.default_dynamics_parameters[param].shape if(param_shape ==()): total_dimension += 1 else: total_dimension += param_shape[0] return total_dimension def get_dynamics_parameters(self): """ Returns the values of the current dynamics parameters. """ return copy.deepcopy(self.dynamics_parameters) def get_default_dynamics_parameters(self): """ Returns the default values of the dynamics parameters. """ return copy.deepcopy(self.default_dynamics_parameters) def get_factors_for_randomisation(self): """ Returns the factor used for domain randomisation. """ return copy.deepcopy(self.factors_for_param_randomisation) def set_dynamics_parameters(self, dynamics_parameter_dict): """ Setting the dynamics parameters of the environment to specific values. These are going to be used the next time the environment is reset, and will be overriden if domain randomisation is on. :param dynamics_parameter_dict: Dictionary with the values of the parameters to set. """ for key, value in dynamics_parameter_dict.items(): assert key in self.dynamics_parameters, 'Setting a parameter that does not exist' self.dynamics_parameters[key] = value def randomisation_off(self,): ''' Disable the parameter randomisation temporarily and cache the current set of parameters and which parameters are being randomised.This can be useful for evaluation. ''' current_params_to_randomise = self.get_randomised_parameters() current_params = self.get_dynamics_parameters() self.cached_parameters_to_randomise = current_params_to_randomise self.cached_dynamics_parameters = current_params self.parameters_to_randomise = [] return current_params, current_params_to_randomise def randomisation_on(self): ''' Restoring the randomisation as they were before the call to switch_params ''' if(self.cached_dynamics_parameters is None): print("Randomisation was not switched off before switching it back on.") return self.parameters_to_randomise = self.cached_parameters_to_randomise self.set_dynamics_parameters(self.cached_dynamics_parameters) self.cached_parameters_to_randomise = None self.cached_dynamics_parameters = None def sample_parameter_randomisation(self, parameters=None): ''' Samples a dictionary of dynamics parameters values using the randomisation process currently set in the environment parameters ([string,]) : List of parameters to sample a randomisation from. If None, all the allowed parameters are sampled. ''' if (not self.initialised): print('Function has undefined behaviour if environment fully initialised, returning with no effect') return parameters_sample = {} for key, val in self._parameter_for_randomisation_generator(parameters): assert key in self.get_parameter_keys(), '{} not allowed. Choose from {}'.format(key, self.get_parameter_keys()) parameters_sample[key] = val return parameters_sample def _set_goal_neutral_offset(self, goal_x, goal_y): self.specific_goal_position = np.array([goal_x, goal_y]) def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # goal ids self.goal_body_id = self.sim.model.body_name2id("goal") self.goal_site_id = self.sim.model.site_name2id("goal") # Gripper ids self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.sim.forward() self.init_right_hand_quat = self._right_hand_quat self.init_right_hand_orn = self._right_hand_orn self.init_right_hand_pos = self._right_hand_pos eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape((3, 3)) self.world_rot_in_eef = copy.deepcopy(eef_rot_in_world.T) if (self.randomise_initial_conditions and self.initialised): # Start the gripper in a 10x10 cm area around the neutral position eef_pos = self.sim.data.get_body_xpos('right_hand') noise = self.np_random.uniform(-1, 1, 3) * np.array([0.10, 0.10, 0.0]) init_pos = eef_pos + noise init_pose = T.make_pose(init_pos, np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, -1.0]])) # Start the IK search from the rest qpos ref_q = self.mujoco_robot.init_qpos # Express init_pose in the base frame of the robot init_pose_in_base = self.pose_in_base(init_pose) # Do the IK joint_angles = self.IK_solver.compute_joint_angles_for_endpoint_pose(init_pose_in_base, ref_q) # Set the robot joint angles self.set_robot_joint_positions(joint_angles) self.sim.forward() def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [-inf, 0], to encourage the arm to reach the object Goal Distance: in [-inf, 0] the distance between the pushed object and the goal The sparse reward only receives a {0,1} upon reaching the goal Args: action (np array): The action taken in that timestep Returns: reward (float or dict): the reward if sparce rewards are used otherwise a dictionary with the total reward, and the subcoponents of the dense reward. """ reward = 0. # sparse completion reward if not self.reward_shaping and self._check_success(): reward = 1.0 # use a dense reward if self.reward_shaping: # max joint angles reward joint_limits = self._joint_ranges current_joint_pos = self._joint_positions hitting_limits_reward = - np.sum([(x < joint_limits[i, 0] + 0.1 or x > joint_limits[i, 1] - 0.1) for i, x in enumerate(current_joint_pos)]) reward += hitting_limits_reward # reaching reward goal_pos = self.sim.data.site_xpos[self.goal_site_id] goal_pos_actual = goal_pos + np.array([0., 0., 0.025 + self.gripper_size]) eef_pos = self.sim.data.get_body_xpos("right_hand") dist = np.linalg.norm(eef_pos - goal_pos_actual) reaching_reward = -dist reward += reaching_reward # Hitting the table reward hitting_the_table_reward = 0.0 hitting_the_table = self._check_contact_with("table_collision") if (hitting_the_table): hitting_the_table_reward -= 1.0 reward += hitting_the_table_reward # Success Reward success = self._check_success() if (success): reward += 0.1 # Return all three types of rewards reward = {"reward": reward, "reaching_distance": reaching_reward, "hitting_limits_reward": hitting_limits_reward, "hitting_the_table_reward": hitting_the_table_reward, "unstable":False} return reward def _check_success(self): """ Returns True if task has been completed. """ eef_pos = self.sim.data.get_body_xpos("right_hand") goal_pos = self.sim.data.site_xpos[self.goal_site_id] goal_pos_actual = goal_pos + np.array([0., 0., 0.025 + self.gripper_size]) dist = np.linalg.norm(goal_pos_actual - eef_pos) success_radius = self.success_radius # object centre is within the goal radius return dist < success_radius def _pre_action(self, action): """ Takes the action, randomised the control timestep, and adds some additional random noise to the action.""" # Change control timestep to simulate various random time delays timestep_parameter = self.dynamics_parameters['timestep_parameter'] self.control_timestep = self.init_control_timestep + self.np_random.exponential(scale=timestep_parameter) super()._pre_action(action) # Adding forces self.sim.data.qfrc_applied[ self._ref_joint_vel_indexes ] += self.dynamics_parameters['joint_forces'] * self.np_random.uniform(-1, 1, 7) self.sim.data.xfrc_applied[ self._ref_gripper_body_indx ] = self.dynamics_parameters['eef_forces'] * self.np_random.uniform(-1, 1, 6) # Adding force proportional to acceleration self.sim.data.qfrc_applied[ self._ref_joint_vel_indexes ] += self.dynamics_parameters['acceleration_forces'] * self.sim.data.qacc[ self._ref_joint_vel_indexes ] def _post_action(self, action): """ Add dense reward subcomponents to info """ reward, done, info = super()._post_action(action) if self.reward_shaping: info = reward reward = reward["reward"] info["success"] = self._check_success() return reward, done, info def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = OrderedDict() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # Extract position and velocity of the eef eef_pos_in_world = self.sim.data.get_body_xpos("right_hand") eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand") # Apply time delays eef_pos_in_world = self._apply_time_delay(eef_pos_in_world, self.eef_pos_queue) eef_xvelp_in_world = self._apply_time_delay(eef_xvelp_in_world, self.eef_vel_queue) # Add random noise to the observations position_noise = self.dynamics_parameters['eef_obs_position_noise'] velocity_noise = self.dynamics_parameters['eef_obs_velocity_noise'] eef_pos_in_world = eef_pos_in_world + self.np_random.normal(loc=0., scale=position_noise) eef_xvelp_in_world = eef_xvelp_in_world + self.np_random.normal(loc=0., scale=velocity_noise) # Get the goal position in the world goal_pos_in_world = np.array(self.sim.data.site_xpos[self.goal_site_id]) # Correct for the fact that in the real robot we record the eef position at the goal as the observation goal_pos_in_world = goal_pos_in_world + np.array([0., 0., self.gripper_size]) # Get object to goal vectors in EEF frame eef_to_goal_in_world = goal_pos_in_world - eef_pos_in_world eef_to_goal_in_eef = self.world_rot_in_eef.dot(eef_to_goal_in_world) eef_xvelp_in_eef = self.world_rot_in_eef.dot(eef_xvelp_in_world) # Record observations into a dictionary di["eef_pos_in_world"] = eef_pos_in_world di["eef_vel_in_world"] = eef_xvelp_in_world di["goal_pos_in_world"] = goal_pos_in_world di["task-state"] = np.concatenate([eef_to_goal_in_eef, eef_xvelp_in_eef]) return di def _apply_time_delay(self, object, queue): queue.appendleft(copy.deepcopy(object)) if (len(queue) == queue.maxlen): return queue.pop() else: return queue[-1] def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[: self.sim.data.ncon]: if ( self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms() ): collision = True break return collision def _check_contact_with(self, object): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[: self.sim.data.ncon]: if ( (self.sim.model.geom_id2name(contact.geom1) in self.gripper.contact_geoms() and contact.geom2 == self.sim.model.geom_name2id(object)) or (self.sim.model.geom_id2name(contact.geom2) in self.gripper.contact_geoms() and contact.geom1 == self.sim.model.geom_name2id(object)) ): collision = True break return collision def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to object if self.gripper_visualization: # get distance to object object_site_id = self.sim.model.site_name2id(self.model.object_names[self.model.push_object_idx]) dist = np.sum( np.square( self.sim.data.site_xpos[object_site_id] - self.sim.data.get_site_xpos("grip_site") ) ) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.)) ** 15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation, offset in zip(self.robots, (np.pi / 2, -np.pi / 2), (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) xpos += np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.6, 0.6)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace mujoco_arena = TableArena(table_full_size=self.table_true_size, table_friction=self.table_friction, table_offset=self.table_offset) # Arena always gets set to zero origin mujoco_arena.set_origin([0, 0, 0]) # Modify default agentview camera mujoco_arena.set_camera(camera_name="agentview", pos=[ 0.8894354364730311, -3.481824231498976e-08, 1.7383813133506494 ], quat=[ 0.6530981063842773, 0.2710406184196472, 0.27104079723358154, 0.6530979871749878 ]) # initialize objects of interest self.hammer = HammerObject(name="hammer") # Create placement initializer if self.placement_initializer is not None: self.placement_initializer.reset() self.placement_initializer.add_objects(self.hammer) else: # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper rotation_axis = 'y' if self.prehensile else 'z' self.placement_initializer = UniformRandomSampler( name="ObjectSampler", mujoco_objects=self.hammer, x_range=[-0.1, 0.1], y_range=[-0.05, 0.05], rotation=None, rotation_axis=rotation_axis, ensure_object_boundary_in_range=False, ensure_valid_placement=True, reference_pos=self.table_offset, ) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.hammer, )
def _load_model(self): """ Loads an xml model, puts it in self.model. This sets up the mujoco xml for the scene. """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) ### Domain Randomisation ### if (self.initialised): for key, val in self._parameter_for_randomisation_generator(parameters=self.parameters_to_randomise): self.dynamics_parameters[key] = val ## Queues for adding time delays self.eef_pos_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1)) self.eef_vel_queue = deque(maxlen=int(self.dynamics_parameters['eef_timedelay'] + 1)) if (self.pid is not None): self.pid.sample_time = self.dynamics_parameters['pid_iteration_time'] ### Create the Task ### ## Load the Arena ## self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=(0.1, 0.001, 0.001) ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The sawyer robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin([0.16 + self.table_full_size[0] / 2, 0, 0]) goal = BoxObject(size=[0.023 / 2, 0.023 / 2, 0.001 / 2], rgba=[0, 1, 0, 1], ) ## Put everything together into the task ## self.model = ReachTask(self.mujoco_arena, self.mujoco_robot, goal) ### Set the goal position ### # Gripper position at neutral gripper_pos_neutral = [0.44969246, 0.16029991, 1.00875409] if(self.specific_goal_position is not None): init_pos = np.array([gripper_pos_neutral[0] +self.specific_goal_position[0], gripper_pos_neutral[1] + self.specific_goal_position[1], self.model.table_top_offset[2]]) init_pos = array_to_string(init_pos) elif (self.randomise_initial_conditions): # Place goal in a 20x20cm square directly below the eef neutral position. noise = self.np_random.uniform(-1, 1, 3) * np.array([0.20, 0.20, 0.0]) offset = np.array([gripper_pos_neutral[0], gripper_pos_neutral[1], self.model.table_top_offset[2]]) init_pos = array_to_string(noise + offset) else: init_pos = np.concatenate([gripper_pos_neutral[:2], [self.model.table_top_offset[2]]]) init_pos = array_to_string(init_pos) self.model.xml_goal.set("pos", init_pos) ### Set the xml parameters to the values given by the dynamics_parameters attribute ### if (self.initialised): self._apply_xml_dynamics_parameters()
class PandaLift(PandaEnv): """ This class corresponds to the lifting task for the Panda robot arm. """ def __init__( self, gripper_type="PandaGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=True, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, ) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest cube = BoxObject( size_min=[0.020, 0.020, 0.020], # [0.015, 0.015, 0.015], size_max=[0.022, 0.022, 0.022], # [0.018, 0.018, 0.018]) rgba=[1, 0, 0, 1], ) self.mujoco_objects = OrderedDict([("cube", cube)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [0, 1], to encourage the arm to reach the cube Grasping: in {0, 0.25}, non-zero if arm is grasping the cube Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Args: action (np array): unused for this task Returns: reward (float): the reward """ reward = 0. # sparse completion reward if self._check_success(): reward = 1.0 # use a shaping reward if self.reward_shaping: # reaching reward cube_pos = self.sim.data.body_xpos[self.cube_body_id] gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - cube_pos) reaching_reward = 1 - np.tanh(10.0 * dist) reward += reaching_reward # grasping reward touch_left_finger = False touch_right_finger = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_left_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cube_geom_id: touch_right_finger = True if c.geom1 == self.cube_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if touch_left_finger and touch_right_finger: reward += 0.25 return reward def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: robot-state: contains robot-centric information. object-state: requires @self.use_object_obs to be True. contains object-centric information. image: requires @self.use_camera_obs to be True. contains a rendered frame from the simulation. depth: requires @self.use_camera_obs and @self.camera_depth to be True. contains a rendered depth map from the simulation """ di = super()._get_observation() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: # position and rotation of object cube_pos = np.array(self.sim.data.body_xpos[self.cube_body_id]) cube_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cube_body_id]), to="xyzw") di["cube_pos"] = cube_pos di["cube_quat"] = cube_quat gripper_site_pos = np.array( self.sim.data.site_xpos[self.eef_site_id]) di["gripper_to_cube"] = gripper_site_pos - cube_pos di["object-state"] = np.concatenate( [cube_pos, cube_quat, di["gripper_to_cube"]]) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_success(self): """ Returns True if task has been completed. """ cube_height = self.sim.data.body_xpos[self.cube_body_id][2] table_height = self.table_full_size[2] # cube is higher than the table top above a margin return cube_height > table_height + 0.04 def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class TwoArmHandover(RobotEnv): """ This class corresponds to the handover task for two robot arms. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be either 2 single single-arm robots or 1 bimanual robot! env_configuration (str): Specifies how to position the robots within the environment. Can be either: :`'bimanual'`: Only applicable for bimanual robot setups. Sets up the (single) bimanual robot on the -x side of the table :`'single-arm-parallel'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots next to each other on the -x side of the table :`'single-arm-opposed'`: Only applicable for multi single arm setups. Sets up the (two) single armed robots opposed from each others on the opposite +/-y sides of the table (Default option) controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. prehensile (bool): If true, handover object starts on the table. Else, the object starts in Arm0's gripper table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: ValueError: [Invalid number of robots specified] ValueError: [Invalid env configuration] ValueError: [Invalid robots for specified env configuration] """ def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", prehensile=True, table_full_size=(0.8, 1.2, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # Task settings self.prehensile = prehensile # settings for table top self.table_full_size = table_full_size self.table_true_size = list(table_full_size) self.table_true_size[ 1] *= 0.25 # true size will only be partially wide self.table_friction = table_friction self.table_offset = [0, self.table_full_size[1] * (-3 / 8), 0.8] # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.height_threshold = 0.1 # threshold above the table surface which the hammer is considered lifted # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper rotation_axis = 'y' if self.prehensile else 'z' self.placement_initializer = UniformRandomSampler( x_range=[-0.1, 0.1], y_range=[-0.05, 0.05], ensure_object_boundary_in_range=False, rotation=None, rotation_axis=rotation_axis, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.0 is provided when only Arm 1 is gripping the handle and has the handle lifted above a certain threshold Un-normalized max-wise components if using reward shaping: - Arm0 Reaching: (1) in [0, 0.25] proportional to the distance between Arm 0 and the handle - Arm0 Grasping: (2) in {0, 0.5}, nonzero if Arm 0 is gripping the hammer (any part). - Arm0 Lifting: (3) in {0, 1.0}, nonzero if Arm 0 lifts the handle from the table past a certain threshold - Arm0 Hovering: (4) in {0, [1.0, 1.25]}, nonzero only if Arm0 is actively lifting the hammer, and is proportional to the distance between the handle and Arm 1 conditioned on the handle being lifted from the table and being grasped by Arm 0 - Mutual Grasping: (5) in {0, 1.5}, nonzero if both Arm 0 and Arm 1 are gripping the hammer (Arm 1 must be gripping the handle) while lifted above the table - Handover: (6) in {0, 2.0}, nonzero when only Arm 1 is gripping the handle and has the handle lifted above the table Note that the final reward is normalized and scaled by reward_scale / 2.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ # Initialize reward reward = 0 # use a shaping reward if specified if self.reward_shaping: # Grab relevant parameters arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info( ) # First, we'll consider the cases if the hammer is lifted above the threshold (step 3 - 6) if hammer_height - table_height > self.height_threshold: # Split cases depending on whether arm1 is currently grasping the handle or not if arm1_grasp_handle: # Check if arm0 is grasping if arm0_grasp_any: # This is step 5 reward = 1.5 else: # This is step 6 (completed task!) reward = 2.0 # This is the case where only arm0 is grasping (step 2-3) else: reward = 1.0 # Add in up to 0.25 based on distance between handle and arm1 dist = np.linalg.norm(self._gripper_1_to_handle) reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist)) reward += reaching_reward # Else, the hammer is still on the ground ): else: # Split cases depending on whether arm0 is currently grasping the handle or not if arm0_grasp_any: # This is step 2 reward = 0.5 else: # This is step 1, we want to encourage arm0 to reach for the handle dist = np.linalg.norm(self._gripper_0_to_handle) reaching_reward = 0.25 * (1 - np.tanh(1.0 * dist)) reward = reaching_reward # Else this is the sparse reward setting else: # Provide reward if only Arm 1 is grasping the hammer and the handle lifted above the pre-defined threshold if self._check_success(): reward = 2.0 if self.reward_scale is not None: reward *= self.reward_scale / 2.0 return reward def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Adjust base pose(s) accordingly if self.env_configuration == "bimanual": xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) else: if self.env_configuration == "single-arm-opposed": # Set up robots facing towards each other by rotating them from their default position for robot, rotation, offset in zip(self.robots, (np.pi / 2, -np.pi / 2), (-0.25, 0.25)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) rot = np.array((0, 0, rotation)) xpos = T.euler2mat(rot) @ np.array(xpos) xpos += np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) robot.robot_model.set_base_ori(rot) else: # "single-arm-parallel" configuration setting # Set up robots parallel to each other but offset from the center for robot, offset in zip(self.robots, (-0.6, 0.6)): xpos = robot.robot_model.base_xpos_offset["table"]( self.table_full_size[0]) xpos = np.array(xpos) + np.array((0, offset, 0)) robot.robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_true_size, table_friction=self.table_friction, table_offset=self.table_offset) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest self.hammer = HammerObject(name="hammer") self.mujoco_objects = OrderedDict([("hammer", self.hammer)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( mujoco_arena=self.mujoco_arena, mujoco_robots=[robot.robot_model for robot in self.robots], mujoco_objects=self.mujoco_objects, visual_objects=None, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Hammer object references from this env self.hammer_body_id = self.sim.model.body_name2id("hammer") self.hammer_handle_geom_id = self.sim.model.geom_name2id( "hammer_handle") self.hammer_head_geom_id = self.sim.model.geom_name2id("hammer_head") self.hammer_face_geom_id = self.sim.model.geom_name2id("hammer_face") self.hammer_claw_geom_id = self.sim.model.geom_name2id("hammer_claw") # General env references self.table_top_id = self.sim.model.site_name2id("table_top") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): # If prehensile, set the object normally if self.prehensile: self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping # the object initially else: eef_rot_quat = T.mat2quat( T.euler2mat( [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0])) obj_quat[i] = T.quat_multiply(obj_quat[i], eef_rot_quat) for j in range(100): # Set object in hand self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [self._eef0_xpos, np.array(obj_quat[i])])) # Close gripper (action = 1) and prevent arm from moving if self.env_configuration == 'bimanual': # Execute no-op action with gravity compensation torques = np.concatenate([ self.robots[0].controller["right"]. torque_compensation, self.robots[0]. controller["left"].torque_compensation ]) self.sim.data.ctrl[self.robots[ 0]._ref_joint_torq_actuator_indexes] = torques # Execute gripper action self.robots[0].grip_action([1], "right") else: # Execute no-op action with gravity compensation self.sim.data.ctrl[self.robots[0]._ref_joint_torq_actuator_indexes] =\ self.robots[0].controller.torque_compensation self.sim.data.ctrl[self.robots[1]._ref_joint_torq_actuator_indexes] = \ self.robots[1].controller.torque_compensation # Execute gripper action self.robots[0].grip_action([1]) # Take forward step self.sim.step() def _get_task_info(self): """ Helper function that grabs the current relevant locations of objects of interest within the environment Returns: 4-tuple: - (bool) True if Arm0 is grasping any part of the hammer - (bool) True if Arm1 is grasping the hammer handle - (float) Height of the hammer body - (float) Height of the table surface """ # Get height of hammer and table and define height threshold hammer_angle_offset = (self.hammer.handle_length / 2 + 2 * self.hammer.head_halfsize) * np.sin( self._hammer_angle) hammer_height = self.sim.data.geom_xpos[self.hammer_handle_geom_id][2]\ - self.hammer.get_top_offset()[2]\ - hammer_angle_offset table_height = self.sim.data.site_xpos[self.table_top_id][2] # Check if any Arm's gripper is grasping the hammer handle # Single bimanual robot setting if self.env_configuration == "bimanual": _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["left_finger"], self.hammer.all_geoms))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper["left"]. important_geoms["right_finger"], self.hammer.all_geoms))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["left_finger"], self.hammer.handle_geoms))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[0].gripper["right"]. important_geoms["right_finger"], self.hammer.handle_geoms))) > 0 # Multi single arm setting else: _contacts_0_lf = len( list( self.find_contacts( self.robots[0].gripper.important_geoms["left_finger"], self.hammer.all_geoms))) > 0 _contacts_0_rf = len( list( self.find_contacts( self.robots[0].gripper.important_geoms["right_finger"], self.hammer.all_geoms))) > 0 _contacts_1_lf = len( list( self.find_contacts( self.robots[1].gripper.important_geoms["left_finger"], self.hammer.handle_geoms))) > 0 _contacts_1_rf = len( list( self.find_contacts( self.robots[1].gripper.important_geoms["right_finger"], self.hammer.handle_geoms))) > 0 arm0_grasp_any = True if _contacts_0_lf and _contacts_0_rf else False arm1_grasp_handle = True if _contacts_1_lf and _contacts_1_rf else False # Return all relevant values return arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix if self.env_configuration == "bimanual": pr0 = self.robots[0].robot_model.naming_prefix + "right_" pr1 = self.robots[0].robot_model.naming_prefix + "left_" else: pr0 = self.robots[0].robot_model.naming_prefix pr1 = self.robots[1].robot_model.naming_prefix # position and rotation of hammer di["hammer_pos"] = np.array(self._hammer_pos) di["hammer_quat"] = np.array(self._hammer_quat) di["handle_xpos"] = np.array(self._handle_xpos) di[pr0 + "eef_xpos"] = np.array(self._eef0_xpos) di[pr1 + "eef_xpos"] = np.array(self._eef1_xpos) di[pr0 + "gripper_to_handle"] = np.array(self._gripper_0_to_handle) di[pr1 + "gripper_to_handle"] = np.array(self._gripper_1_to_handle) di["object-state"] = np.concatenate([ di["hammer_pos"], di["hammer_quat"], di["handle_xpos"], di[pr0 + "eef_xpos"], di[pr1 + "eef_xpos"], di[pr0 + "gripper_to_handle"], di[pr1 + "gripper_to_handle"], ]) return di def _check_success(self): """ Check if hammer is successfully handed off Returns: bool: True if handover has been completed """ # Grab relevant params arm0_grasp_any, arm1_grasp_handle, hammer_height, table_height = self._get_task_info( ) return \ True if \ arm1_grasp_handle and \ not arm0_grasp_any and \ hammer_height - table_height > self.height_threshold \ else False def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ robots = robots if type(robots) == list or type(robots) == tuple else [ robots ] if self.env_configuration == "single-arm-opposed" or self.env_configuration == "single-arm-parallel": # Specifically two robots should be inputted! is_bimanual = False if type(robots) is not list or len(robots) != 2: raise ValueError( "Error: Exactly two single-armed robots should be inputted " "for this task configuration!") elif self.env_configuration == "bimanual": is_bimanual = True # Specifically one robot should be inputted! if type(robots) is list and len(robots) != 1: raise ValueError( "Error: Exactly one bimanual robot should be inputted " "for this task configuration!") else: # This is an unknown env configuration, print error raise ValueError( "Error: Unknown environment configuration received. Only 'bimanual'," "'single-arm-parallel', and 'single-arm-opposed' are supported. Got: {}" .format(self.env_configuration)) # Lastly, check to make sure all inputted robot names are of their correct type (bimanual / not bimanual) for robot in robots: if check_type(robot, RobotType.bimanual) != is_bimanual: raise ValueError( "Error: For {} configuration, expected bimanual check to return {}; " "instead, got {}.".format( self.env_configuration, is_bimanual, check_type(robot, RobotType.bimanual))) @property def _handle_xpos(self): """ Grab the position of the hammer handle. Returns: np.array: (x,y,z) position of handle """ return self.sim.data.geom_xpos[self.hammer_handle_geom_id] @property def _head_xpos(self): """ Grab the position of the hammer head. Returns: np.array: (x,y,z) position of head """ return self.sim.data.geom_xpos[self.hammer_head_geom_id] @property def _face_xpos(self): """ Grab the position of the hammer face. Returns: np.array: (x,y,z) position of face """ return self.sim.data.geom_xpos[self.hammer_face_geom_id] @property def _claw_xpos(self): """ Grab the position of the hammer claw. Returns: np.array: (x,y,z) position of claw """ return self.sim.data.geom_xpos[self.hammer_claw_geom_id] @property def _hammer_pos(self): """ Grab the position of the hammer body. Returns: np.array: (x,y,z) position of body """ return np.array(self.sim.data.body_xpos[self.hammer_body_id]) @property def _hammer_quat(self): """ Grab the orientation of the hammer body. Returns: np.array: (x,y,z,w) quaternion of the hammer body """ return T.convert_quat(self.sim.data.body_xquat[self.hammer_body_id], to="xyzw") @property def _hammer_angle(self): """ Calculate the angle of hammer with the ground, relative to it resting horizontally Returns: float: angle in radians """ mat = T.quat2mat(self._hammer_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) return np.pi / 2 - np.arccos(np.dot(z_unit, z_rotated)) @property def _world_quat(self): """ Grab the world orientation Returns: np.array: (x,y,z,w) world quaternion """ return T.convert_quat(np.array([1, 0, 0, 0]), to="xyzw") @property def _eef0_xpos(self): """ Grab the position of Robot 0's end effector. Returns: np.array: (x,y,z) position of EEF0 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["right"]]) else: return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) @property def _eef1_xpos(self): """ Grab the position of Robot 1's end effector. Returns: np.array: (x,y,z) position of EEF1 """ if self.env_configuration == "bimanual": return np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id["left"]]) else: return np.array( self.sim.data.site_xpos[self.robots[1].eef_site_id]) @property def _eef0_xmat(self): """ End Effector 0 orientation as a rotation matrix Note that this draws the orientation from the "ee" site, NOT the gripper site, since the gripper orientations are inconsistent! Returns: np.array: (3,3) orientation matrix for EEF0 """ pf = self.robots[0].robot_model.naming_prefix if self.env_configuration == "bimanual": return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "right_ee")]).reshape(3, 3) else: return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3) @property def _eef1_xmat(self): """ End Effector 1 orientation as a rotation matrix Note that this draws the orientation from the "right_/left_hand" body, NOT the gripper site, since the gripper orientations are inconsistent! Returns: np.array: (3,3) orientation matrix for EEF1 """ if self.env_configuration == "bimanual": pf = self.robots[0].robot_model.naming_prefix return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "left_ee")]).reshape(3, 3) else: pf = self.robots[1].robot_model.naming_prefix return np.array(self.sim.data.site_xmat[ self.sim.model.site_name2id(pf + "ee")]).reshape(3, 3) @property def _gripper_0_to_handle(self): """ Calculate vector from the left gripper to the hammer handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF0 """ return self._handle_xpos - self._eef0_xpos @property def _gripper_1_to_handle(self): """ Calculate vector from the right gripper to the hammer handle. Returns: np.array: (dx,dy,dz) distance vector between handle and EEF1 """ return self._handle_xpos - self._eef1_xpos
class PandaPush(change_dof(PandaEnv, 7, 8)): # don't need to control a gripper """ This class corresponds to the pushing task for the Panda robot arm. """ parameters_spec = { **PandaEnv.parameters_spec, 'table_size_0': [0.7, 0.9], 'table_size_1': [0.7, 0.9], 'table_size_2': [0.7, 0.9], #'table_friction_0': [0.4, 1.6], 'table_friction_1': [0.0025, 0.0075], 'table_friction_2': [0.00005, 0.00015], 'boxobject_size_0': [0.018, 0.022], 'boxobject_size_1': [0.018, 0.022], 'boxobject_size_2': [0.018, 0.022], 'boxobject_friction_0': [0.05, 0.15], #'boxobject_friction_1': [0.0025, 0.0075], # fixed this to zero 'boxobject_friction_2': [0.00005, 0.00015], 'boxobject_density_1000': [0.06, 0.14], } def reset_props(self, table_size_0=0.8, table_size_1=0.8, table_size_2=0.8, table_friction_0=0., table_friction_1=0.005, table_friction_2=0.0001, boxobject_size_0=0.020, boxobject_size_1=0.020, boxobject_size_2=0.020, boxobject_friction_0=0.1, boxobject_friction_1=0.0, boxobject_friction_2=0.0001, boxobject_density_1000=0.1, **kwargs): self.table_full_size = (table_size_0, table_size_1, table_size_2) self.table_friction = (table_friction_0, table_friction_1, table_friction_2) self.boxobject_size = (boxobject_size_0, boxobject_size_1, boxobject_size_2) self.boxobject_friction = (boxobject_friction_0, boxobject_friction_1, boxobject_friction_2) self.boxobject_density = boxobject_density_1000 * 1000. super().reset_props(**kwargs) def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # self.placement_initializer = UniformRandomSampler( # x_range=[-0.1, 0.1], # y_range=[-0.1, 0.1], # ensure_object_boundary_in_range=False, # z_rotation=None, # ) self.placement_initializer = UniformRandomSamplerObjectSpecific( x_ranges=[[-0.03, -0.02], [0.09, 0.1]], y_ranges=[[-0.05, -0.04], [-0.05, -0.04]], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (0., 0.005, 0.0001) self.boxobject_size = (0.02, 0.02, 0.02) self.boxobject_friction = (0.1, 0.005, 0.0001) self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs) def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() self.mujoco_robot.set_base_xpos([0, 0, 0]) # load model for table top workspace self.mujoco_arena = TableArena(table_full_size=self.table_full_size, table_friction=self.table_friction) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # The panda robot has a pedestal, we want to align it with the table self.mujoco_arena.set_origin( [0.16 + self.table_full_size[0] / 2, 0, 0]) # initialize objects of interest # in original robosuite, a simple domain randomization is included in BoxObject implementation, and called here. We choose to discard that implementation. cube = FullyFrictionalBoxObject( size=self.boxobject_size, friction=self.boxobject_friction, density=self.boxobject_density, rgba=[1, 0, 0, 1], ) self.mujoco_cube = cube goal = CylinderObject( size=[0.03, 0.001], rgba=[0, 1, 0, 1], ) self.mujoco_goal = goal self.mujoco_objects = OrderedDict([("cube", cube), ("goal", goal)]) # task includes arena, robot, and objects of interest self.model = TableTopTask( self.mujoco_arena, self.mujoco_robot, self.mujoco_objects, initializer=self.placement_initializer, visual_objects=['goal'], ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() self.cube_body_id = self.sim.model.body_name2id("cube") self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.left_finger_geoms ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.gripper.right_finger_geoms ] self.cube_geom_id = self.sim.model.geom_name2id("cube") # gripper ids self.goal_body_id = self.sim.model.body_name2id('goal') self.goal_site_id = self.sim.model.site_name2id('goal') def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() self.sim.forward() # reset positions of objects self.model.place_objects() # reset joint positions init_pos = self.mujoco_robot.init_qpos init_pos += np.random.randn(init_pos.shape[0]) * 0.02 self.sim.data.qpos[self._ref_joint_pos_indexes] = np.array(init_pos) # shut the gripper self.sim.data.qpos[ self._ref_joint_gripper_actuator_indexes] = np.array([0., -0.]) # set other reference attributes eef_rot_in_world = self.sim.data.get_body_xmat("right_hand").reshape( (3, 3)) self.world_rot_in_eef = copy.deepcopy( eef_rot_in_world.T ) # TODO inspect on this: should we set a golden reference other than a initial position? # reward function from sawyer_push def reward(self, action=None): """ Reward function for the task. The dense reward has three components. Reaching: in [-inf, 0], to encourage the arm to reach the object Goal Distance: in [-inf, 0] the distance between the pushed object and the goal Safety reward in [-inf, 0], -1 for every joint that is at its limit. The sparse reward only receives a {0,1} upon reaching the goal Args: action (np array): The action taken in that timestep Returns: reward (float): the reward previously in robosuite-extra, when dense reward is used, the return value will be a dictionary. but we removed that feature. """ reward = 0. # sparse completion reward if not self.reward_shaping and self._check_success(): reward = 1.0 # use a dense reward if self.reward_shaping: object_pos = self.sim.data.body_xpos[self.cube_body_id] # max joint angles reward joint_limits = self._joint_ranges current_joint_pos = self._joint_positions hitting_limits_reward = -int( any([(x < joint_limits[i, 0] + 0.05 or x > joint_limits[i, 1] - 0.05) for i, x in enumerate(current_joint_pos)])) reward += hitting_limits_reward # reaching reward gripper_site_pos = self.sim.data.site_xpos[self.eef_site_id] dist = np.linalg.norm(gripper_site_pos - object_pos) reaching_reward = -0.15 * dist reward += reaching_reward # print(gripper_site_pos, object_pos, reaching_reward) # Success Reward success = self._check_success() if (success): reward += 0.1 # goal distance reward goal_pos = self.sim.data.site_xpos[self.goal_site_id] dist = np.linalg.norm(goal_pos - object_pos) goal_distance_reward = -dist reward += goal_distance_reward # punish when there is a line of object--gripper--goal angle_g_o_g = angle_between(gripper_site_pos - object_pos, goal_pos - object_pos) if not success and angle_g_o_g < np.pi / 2.: reward += -0.03 - 0.02 * (np.pi / 2. - angle_g_o_g) # print('grippersitepos', gripper_site_pos, # 'objpos', object_pos, # 'jointangles', hitting_limits_reward, # 'reaching', reaching_reward, # 'success', success, # 'goaldist', goal_distance_reward) unstable = reward < -2.5 # # Return all three types of rewards # reward = {"reward": reward, "reaching_distance": -10 * reaching_reward, # "goal_distance": - goal_distance_reward, # "hitting_limits_reward": hitting_limits_reward, # "unstable":unstable} return reward def _check_success(self): """ Returns True if task has been completed. """ object_pos = self.sim.data.body_xpos[self.cube_body_id] goal_pos = self.sim.data.site_xpos[self.goal_site_id] dist = np.linalg.norm(goal_pos - object_pos) goal_horizontal_radius = self.model.mujoco_objects[ 'goal'].get_horizontal_radius() # object centre is within the goal radius return dist < goal_horizontal_radius def step(self, action): """ explicitly shut the gripper """ joined_action = np.append(action, [1.]) return super().step(joined_action) def world2eef(self, world): return self.world_rot_in_eef.dot(world) def put_raw_object_obs(self, di): # Extract position and velocity of the eef eef_pos_in_world = self.sim.data.get_body_xpos("right_hand") eef_xvelp_in_world = self.sim.data.get_body_xvelp("right_hand") # print('eef_pos_in_world', eef_pos_in_world) # Get the position, velocity, rotation and rotational velocity of the object in the world frame object_pos_in_world = self.sim.data.body_xpos[self.cube_body_id] object_xvelp_in_world = self.sim.data.get_body_xvelp('cube') object_rot_in_world = self.sim.data.get_body_xmat('cube') # Get the z-angle with respect to the reference position and do sin-cosine encoding # world_rotation_in_reference = np.array([[0., 1., 0., ], [-1., 0., 0., ], [0., 0., 1., ]]) # object_rotation_in_ref = world_rotation_in_reference.dot(object_rot_in_world) # object_euler_in_ref = T.mat2euler(object_rotation_in_ref) # z_angle = object_euler_in_ref[2] object_quat = convert_quat(self.sim.data.body_xquat[self.cube_body_id], to='xyzw') # Get the goal position in the world goal_site_pos_in_world = np.array( self.sim.data.site_xpos[self.goal_site_id]) # Record observations into a dictionary di['goal_pos_in_world'] = goal_site_pos_in_world di['eef_pos_in_world'] = eef_pos_in_world di['eef_vel_in_world'] = eef_xvelp_in_world di['object_pos_in_world'] = object_pos_in_world di['object_vel_in_world'] = object_xvelp_in_world # di["z_angle"] = np.array([z_angle]) di['object_quat'] = object_quat def process_object_obs(self, di): # z_angle = di['z_angle'] # sine_cosine = np.array([np.sin(8*z_angle), np.cos(8*z_angle)]).reshape((2,)) eef_to_object_in_world = di['object_pos_in_world'] - di[ 'eef_pos_in_world'] # eef_to_object_in_eef = self.world2eef(eef_to_object_in_world) object_to_goal_in_world = di['goal_pos_in_world'] - di[ 'object_pos_in_world'] # object_to_goal_in_eef = self.world2eef(object_to_goal_in_world) # object_xvelp_in_eef = self.world2eef(di['object_vel_in_world']) # eef_xvelp_in_eef = self.world2eef(di['eef_vel_in_world']) task_state = np.concatenate([ eef_to_object_in_world, object_to_goal_in_world, di['eef_vel_in_world'], di['object_vel_in_world'], di['object_quat'] ]) di['task_state'] = task_state def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: gripper_to_object : The x-y component of the gripper to object distance object_to_goal : The x-y component of the object-to-goal distance object_z_rot : the roation of the object around an axis sticking out the table object_xvelp: x-y linear velocity of the object gripper_xvelp: x-y linear velocity of the gripper task-state : a concatenation of all the above. """ # di = super()._get_observation() # joint angles & vel, which we don't need. di = OrderedDict() # camera observations if self.use_camera_obs: camera_obs = self.sim.render( camera_name=self.camera_name, width=self.camera_width, height=self.camera_height, depth=self.camera_depth, ) if self.camera_depth: di["image"], di["depth"] = camera_obs else: di["image"] = camera_obs # low-level object information if self.use_object_obs: self.put_raw_object_obs(di) if self.object_obs_process: self.process_object_obs(di) return di def _check_contact(self): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if (self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() or self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms()): collision = True break return collision def _check_contact_with(self, object): """ Returns True if gripper is in contact with an object. """ collision = False for contact in self.sim.data.contact[:self.sim.data.ncon]: if ((self.sim.model.geom_id2name( contact.geom1) in self.gripper.contact_geoms() and contact.geom2 == self.sim.model.geom_name2id(object)) or (self.sim.model.geom_id2name( contact.geom2) in self.gripper.contact_geoms() and contact.geom1 == self.sim.model.geom_name2id(object))): collision = True break return collision def _gripper_visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.gripper_visualization: # get distance to cube cube_site_id = self.sim.model.site_name2id("cube") dist = np.sum( np.square(self.sim.data.site_xpos[cube_site_id] - self.sim.data.get_site_xpos("grip_site"))) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.eef_site_id] = rgba
class Stack(RobotEnv): """ This class corresponds to the stacking task for a single robot arm. Args: robots (str or list of str): Specification for specific robot arm(s) to be instantiated within this env (e.g: "Sawyer" would generate one arm; ["Panda", "Panda", "Sawyer"] would generate three robot arms) Note: Must be a single single-arm robot! controller_configs (str or list of dict): If set, contains relevant controller parameters for creating a custom controller. Else, uses the default controller for this specific task. Should either be single dict if same controller is to be used for all robots or else it should be a list of the same length as "robots" param gripper_types (str or list of str): type of gripper, used to instantiate gripper models from gripper factory. Default is "default", which is the default grippers(s) associated with the robot(s) the 'robots' specification. None removes the gripper, and any other (valid) model overrides the default gripper. Should either be single str if same gripper type is to be used for all robots or else it should be a list of the same length as "robots" param gripper_visualizations (bool or list of bool): True if using gripper visualization. Useful for teleoperation. Should either be single bool if gripper visualization is to be used for all robots or else it should be a list of the same length as "robots" param initialization_noise (dict or list of dict): Dict containing the initialization noise parameters. The expected keys and corresponding value types are specified below: :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial joint positions. Setting this value to `None` or 0.0 results in no noise being applied. If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied, If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform" Should either be single dict if same noise value is to be used for all robots or else it should be a list of the same length as "robots" param :Note: Specifying "default" will automatically use the default noise settings. Specifying None will automatically create the required dict with "magnitude" set to 0.0. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes rendered image(s) use_object_obs (bool): if True, include object (cube) information in the observation. reward_scale (None or float): Scales the normalized reward function by the amount specified. If None, environment reward remains unnormalized reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables camera_names (str or list of str): name of camera to be rendered. Should either be single str if same name is to be used for all cameras' rendering or else it should be a list of cameras to render. :Note: At least one camera must be specified if @use_camera_obs is True. :Note: To render all robots' cameras of a certain type (e.g.: "robotview" or "eye_in_hand"), use the convention "all-{name}" (e.g.: "all-robotview") to automatically render all camera images from each robot's camera list). camera_heights (int or list of int): height of camera frame. Should either be single int if same height is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_widths (int or list of int): width of camera frame. Should either be single int if same width is to be used for all cameras' frames or else it should be a list of the same length as "camera names" param. camera_depths (bool or list of bool): True if rendering RGB-D, and RGB otherwise. Should either be single bool if same depth setting is to be used for all cameras or else it should be a list of the same length as "camera names" param. Raises: AssertionError: [Invalid number of robots specified] """ def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): """ """ # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.08, 0.08], y_range=[-0.08, 0.08], ensure_object_boundary_in_range=False, rotation=None, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, ) def reward(self, action): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.0 is provided if the red block is stacked on the green block Un-normalized components if using reward shaping: - Reaching: in [0, 0.25], to encourage the arm to reach the cube - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube - Lifting: in {0, 1}, non-zero if arm has lifted the cube - Aligning: in [0, 0.5], encourages aligning one cube over the other - Stacking: in {0, 2}, non-zero if cube is stacked on other cube The reward is max over the following: - Reaching + Grasping - Lifting + Aligning - Stacking The sparse reward only consists of the stacking component. Note that the final reward is normalized and scaled by reward_scale / 2.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ r_reach, r_lift, r_stack = self.staged_rewards() if self.reward_shaping: reward = max(r_reach, r_lift, r_stack) else: reward = 2.0 if r_stack > 0 else 0.0 if self.reward_scale is not None: reward *= self.reward_scale / 2.0 return reward def staged_rewards(self): """ Helper function to calculate staged rewards based on current physical states. Returns: 3-tuple: - (float): reward for reaching and grasping - (float): reward for lifting and aligning - (float): reward for stacking """ # reaching is successful when the gripper site is close to # the center of the cube cubeA_pos = self.sim.data.body_xpos[self.cubeA_body_id] cubeB_pos = self.sim.data.body_xpos[self.cubeB_body_id] gripper_site_pos = self.sim.data.site_xpos[self.robots[0].eef_site_id] dist = np.linalg.norm(gripper_site_pos - cubeA_pos) r_reach = (1 - np.tanh(10.0 * dist)) * 0.25 # collision checking touch_left_finger = False touch_right_finger = False touch_cubeA_cubeB = False for i in range(self.sim.data.ncon): c = self.sim.data.contact[i] if c.geom1 in self.l_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_left_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.l_finger_geom_ids: touch_left_finger = True if c.geom1 in self.r_finger_geom_ids and c.geom2 == self.cubeA_geom_id: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 in self.r_finger_geom_ids: touch_right_finger = True if c.geom1 == self.cubeA_geom_id and c.geom2 == self.cubeB_geom_id: touch_cubeA_cubeB = True if c.geom1 == self.cubeB_geom_id and c.geom2 == self.cubeA_geom_id: touch_cubeA_cubeB = True # additional grasping reward if touch_left_finger and touch_right_finger: r_reach += 0.25 # lifting is successful when the cube is above the table top # by a margin cubeA_height = cubeA_pos[2] table_height = self.mujoco_arena.table_offset[2] cubeA_lifted = cubeA_height > table_height + 0.04 r_lift = 1.0 if cubeA_lifted else 0.0 # Aligning is successful when cubeA is right above cubeB if cubeA_lifted: horiz_dist = np.linalg.norm( np.array(cubeA_pos[:2]) - np.array(cubeB_pos[:2])) r_lift += 0.5 * (1 - np.tanh(horiz_dist)) # stacking is successful when the block is lifted and # the gripper is not holding the object r_stack = 0 not_touching = not touch_left_finger and not touch_right_finger if not_touching and r_lift > 0 and touch_cubeA_cubeB: r_stack = 2.0 return r_reach, r_lift, r_stack def _load_model(self): """ Loads an xml model, puts it in self.model """ super()._load_model() # Verify the correct robot has been loaded assert isinstance(self.robots[0], SingleArm), \ "Error: Expected one single-armed robot! Got {} type instead.".format(type(self.robots[0])) # Adjust base pose accordingly xpos = self.robots[0].robot_model.base_xpos_offset["table"]( self.table_full_size[0]) self.robots[0].robot_model.set_base_xpos(xpos) # load model for table top workspace self.mujoco_arena = TableArena( table_full_size=self.table_full_size, table_friction=self.table_friction, table_offset=(0, 0, 0.8), ) if self.use_indicator_object: self.mujoco_arena.add_pos_indicator() # Arena always gets set to zero origin self.mujoco_arena.set_origin([0, 0, 0]) # initialize objects of interest tex_attrib = { "type": "cube", } mat_attrib = { "texrepeat": "1 1", "specular": "0.4", "shininess": "0.1", } redwood = CustomMaterial( texture="WoodRed", tex_name="redwood", mat_name="redwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) greenwood = CustomMaterial( texture="WoodGreen", tex_name="greenwood", mat_name="greenwood_mat", tex_attrib=tex_attrib, mat_attrib=mat_attrib, ) cubeA = BoxObject( name="cubeA", size_min=[0.02, 0.02, 0.02], size_max=[0.02, 0.02, 0.02], rgba=[1, 0, 0, 1], material=redwood, ) cubeB = BoxObject( name="cubeB", size_min=[0.025, 0.025, 0.025], size_max=[0.025, 0.025, 0.025], rgba=[0, 1, 0, 1], material=greenwood, ) self.mujoco_objects = OrderedDict([("cubeA", cubeA), ("cubeB", cubeB)]) # task includes arena, robot, and objects of interest self.model = ManipulationTask( self.mujoco_arena, [robot.robot_model for robot in self.robots], self.mujoco_objects, initializer=self.placement_initializer, ) self.model.place_objects() def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ super()._get_reference() # Additional object references from this env self.cubeA_body_id = self.sim.model.body_name2id("cubeA") self.cubeB_body_id = self.sim.model.body_name2id("cubeB") # information of objects self.object_names = list(self.mujoco_objects.keys()) self.object_site_ids = [ self.sim.model.site_name2id(ob_name) for ob_name in self.object_names ] # id of grippers for contact checking self.l_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["left_finger"] ] self.r_finger_geom_ids = [ self.sim.model.geom_name2id(x) for x in self.robots[0].gripper.important_geoms["right_finger"] ] self.cubeA_geom_id = self.sim.model.geom_name2id("cubeA") self.cubeB_geom_id = self.sim.model.geom_name2id("cubeB") def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects obj_pos, obj_quat = self.model.place_objects() # Loop through all objects and reset their positions for i, (obj_name, _) in enumerate(self.mujoco_objects.items()): self.sim.data.set_joint_qpos( obj_name + "_jnt0", np.concatenate( [np.array(obj_pos[i]), np.array(obj_quat[i])])) def _get_observation(self): """ Returns an OrderedDict containing observations [(name_string, np.array), ...]. Important keys: `'robot-state'`: contains robot-centric information. `'object-state'`: requires @self.use_object_obs to be True. Contains object-centric information. `'image'`: requires @self.use_camera_obs to be True. Contains a rendered frame from the simulation. `'depth'`: requires @self.use_camera_obs and @self.camera_depth to be True. Contains a rendered depth map from the simulation Returns: OrderedDict: Observations from the environment """ di = super()._get_observation() # low-level object information if self.use_object_obs: # Get robot prefix pr = self.robots[0].robot_model.naming_prefix # position and rotation of the first cube cubeA_pos = np.array(self.sim.data.body_xpos[self.cubeA_body_id]) cubeA_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeA_body_id]), to="xyzw") di["cubeA_pos"] = cubeA_pos di["cubeA_quat"] = cubeA_quat # position and rotation of the second cube cubeB_pos = np.array(self.sim.data.body_xpos[self.cubeB_body_id]) cubeB_quat = convert_quat(np.array( self.sim.data.body_xquat[self.cubeB_body_id]), to="xyzw") di["cubeB_pos"] = cubeB_pos di["cubeB_quat"] = cubeB_quat # relative positions between gripper and cubes gripper_site_pos = np.array( self.sim.data.site_xpos[self.robots[0].eef_site_id]) di[pr + "gripper_to_cubeA"] = gripper_site_pos - cubeA_pos di[pr + "gripper_to_cubeB"] = gripper_site_pos - cubeB_pos di["cubeA_to_cubeB"] = cubeA_pos - cubeB_pos di["object-state"] = np.concatenate([ cubeA_pos, cubeA_quat, cubeB_pos, cubeB_quat, di[pr + "gripper_to_cubeA"], di[pr + "gripper_to_cubeB"], di["cubeA_to_cubeB"], ]) return di def _check_success(self): """ Check if blocks are stacked correctly. Returns: bool: True if blocks are correctly stacked """ _, _, r_stack = self.staged_rewards() return r_stack > 0 def _visualization(self): """ Do any needed visualization here. Overrides superclass implementations. """ # color the gripper site appropriately based on distance to cube if self.robots[0].gripper_visualization: # find closest object square_dist = lambda x: np.sum( np.square(x - self.sim.data.get_site_xpos(self.robots[ 0].gripper.visualization_sites["grip_site"]))) dists = np.array(list(map(square_dist, self.sim.data.site_xpos))) dists[ self.robots[0]. eef_site_id] = np.inf # make sure we don't pick the same site dists[self.robots[0].eef_cylinder_id] = np.inf ob_dists = dists[ self.object_site_ids] # filter out object sites we care about min_dist = np.min(ob_dists) # set RGBA for the EEF site here max_dist = 0.1 scaled = (1.0 - min(min_dist / max_dist, 1.))**15 rgba = np.zeros(4) rgba[0] = 1 - scaled rgba[1] = scaled rgba[3] = 0.5 self.sim.model.site_rgba[self.robots[0].eef_site_id] = rgba def _check_robot_configuration(self, robots): """ Sanity check to make sure the inputted robots and configuration is acceptable Args: robots (str or list of str): Robots to instantiate within this env """ if type(robots) is list: assert len( robots ) == 1, "Error: Only one robot should be inputted for this task!"