def move_orient(self, env, q1): q0 = env.observation_spec()['eef_quat'] x_target = env.observation_spec()['eef_pos'] fraction_size = 50 for i in range(fraction_size): q_target = T.quat_slerp(q0, q1, fraction=(i + 1) / fraction_size) steps = 0 lamda = 0.01 current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) while (np.linalg.norm(T.mat2euler(drotation)) > 0.01): current = env._right_hand_orn target_rotation = T.quat2mat(q_target) drotation = current.T.dot(target_rotation) dquat = T.mat2quat(drotation) x_current = env.observation_spec()['eef_pos'] d_pos = np.clip((x_target - x_current) * lamda, -0.05, 0.05) d_pos = [0, 0, 0] action = np.concatenate((d_pos, dquat, [self.grasp])) env.step(action) env.render() steps += 1 if (steps > 20): break return
def run_controller(self): """ Calculates the torques required to reach the desired setpoint Returns: np.array: Command torques """ # Update state self.update() # Update interpolated action if necessary desired_pos = None rotation = None update_velocity_goal = False # Update interpolated goals if active if self.interpolator_pos is not None: # Linear case if self.interpolator_pos.order == 1: desired_pos = self.interpolator_pos.get_interpolated_goal() else: # Nonlinear case not currently supported pass update_velocity_goal = True else: desired_pos = self.reference_target_pos if self.interpolator_ori is not None: # Linear case if self.interpolator_ori.order == 1: # relative orientation based on difference between current ori and ref self.relative_ori = orientation_error(self.ee_ori_mat, self.ori_ref) ori_error = self.interpolator_ori.get_interpolated_goal() rotation = T.quat2mat(ori_error) else: # Nonlinear case not currently supported pass update_velocity_goal = True else: rotation = T.quat2mat(self.reference_target_orn) # Only update the velocity goals if we're interpolating if update_velocity_goal: velocities = self.get_control(dpos=(desired_pos - self.ee_pos), rotation=rotation) super().set_goal(velocities) # Run controller with given action return super().run_controller()
def rotate_camera(self, point, axis, angle): """ Rotate the camera view about a direction (in the camera frame). Args: point (None or 3-array): (x,y,z) cartesian coordinates about which to rotate camera in camera frame. If None, assumes the point is the current location of the camera axis (3-array): (ax,ay,az) axis about which to rotate camera in camera frame angle (float): how much to rotate about that direction Returns: 2-tuple: pos: (x,y,z) updated camera position quat: (x,y,z,w) updated camera quaternion orientation """ # current camera rotation + pos camera_pos = np.array(self.env.sim.data.get_mocap_pos(self.mover_body_name)) camera_rot = T.quat2mat(T.convert_quat(self.env.sim.data.get_mocap_quat(self.mover_body_name), to="xyzw")) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, axis, point=point) camera_pose = np.zeros((4, 4)) camera_pose[:3, :3] = camera_rot camera_pose[:3, 3] = camera_pos camera_pose = camera_pose @ R # Update camera pose pos, quat = camera_pose[:3, 3], T.mat2quat(camera_pose[:3, :3]) self.set_camera_pose(pos=pos, quat=quat) return pos, quat
def ik_robot_eef_joint_cartesian_pose(self): """ Calculates the current cartesian pose of the last joint of the ik robot with respect to the base frame as a (pos, orn) tuple where orn is a x-y-z-w quaternion Returns: 2-tuple: - (np.array) position - (np.array) orientation """ eef_pos_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx, physicsClientId=self.bullet_server_id)[0]) eef_orn_in_world = np.array(p.getLinkState(self.ik_robot, self.bullet_ee_idx, physicsClientId=self.bullet_server_id)[1]) eef_pose_in_world = T.pose2mat((eef_pos_in_world, eef_orn_in_world)) base_pos_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot, physicsClientId=self.bullet_server_id)[0]) base_orn_in_world = np.array(p.getBasePositionAndOrientation(self.ik_robot, physicsClientId=self.bullet_server_id)[1]) base_pose_in_world = T.pose2mat((base_pos_in_world, base_orn_in_world)) world_pose_in_base = T.pose_inv(base_pose_in_world) # Update reference to inverse orientation offset from pybullet base frame to world frame self.base_orn_offset_inv = T.quat2mat(T.quat_inverse(base_orn_in_world)) # Update reference target orientation self.reference_target_orn = T.quat_multiply(self.reference_target_orn, base_orn_in_world) eef_pose_in_base = T.pose_in_A_to_pose_in_B( pose_A=eef_pose_in_world, pose_A_in_B=world_pose_in_base ) return T.mat2pose(eef_pose_in_base)
def _post_action(self, action): """ Run any necessary visualization after running the action Args: action (np.array): Action being passed during this timestep Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) empty dict to be filled with information by subclassed method """ reward, done, info = super()._post_action(action) chassis_body_id = self.sim.model.body_name2id( self.robots[0].robot_model.robot_base) body_pos_z = self.sim.data.body_xpos[chassis_body_id][2] quat = np.array([ self.sim.data.qpos[x] for x in self.robots[0]._ref_chassis_pos_indexes ])[3:] mat = quat2mat(quat) euler = mat2euler(mat) roll, pitch = euler[:2] if abs(roll > 0.785) or abs(pitch > 0.785) or body_pos_z < 0.20: done = True reward = -10 return reward, done, info
def _get_initial_qpos(self): """ Calculates the initial joint position for the robot based on the initial desired pose (self.traj_pt, self.goal_quat). Args: Returns: (np.array): n joint positions """ pos = self._convert_robosuite_to_toolbox_xpos(self.traj_pt) ori_euler = mat2euler(quat2mat(self.goal_quat)) # desired pose T = SE3(pos) * SE3.RPY(ori_euler) # find initial joint positions if self.robots[0].name == "UR5e": robot = rtb.models.DH.UR5() sol = robot.ikine_min(T, q0=self.robots[0].init_qpos) # flip last joint around (pi) sol.q[-1] -= np.pi return sol.q elif self.robots[0].name == "Panda": robot = rtb.models.DH.Panda() sol = robot.ikine_min(T, q0=self.robots[0].init_qpos) return sol.q
def get_pose(self): offset_mat = np.eye(4) q_wxyz = np.concatenate((self.offset_ori[3:], self.offset_ori[:3])) offset_mat[:3, :3] = T.quat2mat(q_wxyz) offset_mat[:3, -1] = self.offset_pos if self.camera_link_name != "worldbody": pos_body_in_world = self.mujoco_env.sim.data.get_body_xpos( self.camera_link_name) rot_body_in_world = self.mujoco_env.sim.data.get_body_xmat( self.camera_link_name).reshape((3, 3)) pose_body_in_world = T.make_pose(pos_body_in_world, rot_body_in_world) total_pose = np.array(pose_body_in_world).dot(np.array(offset_mat)) position = total_pose[:3, -1] rot = total_pose[:3, :3] wxyz = T.mat2quat(rot) xyzw = np.concatenate((wxyz[1:], wxyz[:1])) else: position = np.array(self.offset_pos) xyzw = self.offset_ori return np.concatenate((position, xyzw))
def cons_1(self, x): # First pose: constrain peg further than lPeg in z direction relative to hole p_peg = x[0:3] p_hole = x[3:6] q_hole = x[10:14] p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)), p_peg - p_hole) return p_peg_in_hole_frame[2]
def _randomize_rotation(self, name): """ Helper function to randomize orientation of a specific camera Args: name (str): Name of the camera to randomize for """ # sample a small, random axis-angle delta rotation random_axis, random_angle = trans.random_axis_angle(angle_limit=self.rotation_perturbation_size, random_state=self.random_state) random_delta_rot = trans.quat2mat(trans.axisangle2quat(random_axis * random_angle)) # compute new rotation and set it base_rot = trans.quat2mat(trans.convert_quat(self._defaults[name]['quat'], to='xyzw')) new_rot = random_delta_rot.T.dot(base_rot) new_quat = trans.convert_quat(trans.mat2quat(new_rot), to='wxyz') self.set_quat( name, new_quat, )
def _hammer_angle(self): """ Calculate the angle of hammer with the ground, relative to it resting horizontally Returns: float: angle in radians """ mat = T.quat2mat(self._hammer_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) return np.pi / 2 - np.arccos(np.dot(z_unit, z_rotated))
def _make_input(self, action, old_quat): """ Helper function that returns a dictionary with keys dpos, rotation from a raw input array. The first three elements are taken to be displacement in position, and a quaternion indicating the change in rotation with respect to @old_quat. """ return { "dpos": action[:3], # IK controller takes an absolute orientation in robot base frame "rotation": T.quat2mat(T.quat_multiply(old_quat, action[3:7])), }
def cons_2(self, x): # Second pose: constrain peg further than lPeg in z direction relative to hole # also constrain peg x and y in hole frame to be below a tolerance # also constrain rotation error ind_offset = 14 # ind at which to start looking for pose 2 info p_peg = x[ind_offset + 0:ind_offset + 3] p_hole = x[ind_offset + 3:ind_offset + 6] q_peg = x[ind_offset + 6:ind_offset + 10] q_hole = x[ind_offset + 10:ind_offset + 14] p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)), p_peg - p_hole) z_hole = np.matmul(T.quat2mat(q_hole), np.array([0, 0, 1])) # hole z in world frame z_peg = np.matmul(T.quat2mat(q_peg), np.array([0, 0, 1])) # peg z in world frames # Want the z axes to be antiparallel z_defect = np.linalg.norm(z_hole + z_peg) return np.hstack((p_peg_in_hole_frame, z_defect))
def cons_3(self, x): # Third pose: constrain peg less than lPeg/2 in z direction relative to hole # also constrain peg x and y in hole frame to be below a tolerance # also constrain same orientations as in pose 2 last_ind_offset = 14 # ind at which to start looking for pose 2 info ind_offset = 28 # ind at which to start looking for pose 3 info p_peg = x[ind_offset + 0:ind_offset + 3] p_hole = x[ind_offset + 3:ind_offset + 6] # Using the quats from pose 2 because assuming they will stay the same q_hole = x[last_ind_offset + 10:ind_offset + 14] p_peg_in_hole_frame = np.matmul(T.quat2mat(T.quat_inverse(q_hole)), p_peg - p_hole) return np.hstack(p_peg_in_hole_frame)
def update_obs(self, obs, F_int): # Update Observed value self.eef_pos = obs['robot0_eef_pos'] # array 1x3 self.eef_quat = obs['robot0_eef_quat'] # array 1x4 self.eef_quat[0] = self.eef_quat[1] self.eef_quat[1] = self.eef_quat[0] self.eef_quat[2] = -self.eef_quat[2] self.eef_vel = obs['robot0_gripper_qvel'][0:3] self.peg_pos = obs['peg_pos'] + quat2mat(obs['peg_quat']) @ np.array( [0, 0, 0.025]) # array 1x3 self.peg_quat = obs['peg_quat'] # array 1X4 self.eef_to_peg_pos = self.peg_pos - self.eef_pos # array 1x3 self.eef_to_peg_quat = self.peg_quat - self.eef_quat # array 1x3 self.hole_pos = obs['plate_pos'] + quat2mat( obs['plate_quat']) @ np.array([0.155636, 0.1507, 0.1]) # array 1x3 self.eef_to_hole_pos = self.hole_pos - self.eef_pos # array 1x3 self.F_int = F_int # decide status based on the observation record = self.action_status self.decide_status() if record != self.action_status or record != self.init: self.change_inital_conditions() self.init = 0
def update(self, force=False): """ Updates the state of the robot arm, including end effector pose / orientation / velocity, joint pos/vel, jacobian, and mass matrix. By default, since this is a non-negligible computation, multiple redundant calls will be ignored via the self.new_update attribute flag. However, if the @force flag is set, the update will occur regardless of that state of self.new_update. This base class method of @run_controller resets the self.new_update flag Args: force (bool): Whether to force an update to occur or not """ # TODO (chongyi zheng): update controller state with ROS # Only run update if self.new_update or force flag is set if self.new_update or force: # self.sim.forward() # self.ee_pos = np.array(self.sim.data.site_xpos[self.sim.model.site_name2id(self.eef_name)]) # self.ee_ori_mat = np.array(self.sim.data.site_xmat[self.sim.model.site_name2id(self.eef_name)].reshape([3, 3])) # self.ee_pos_vel = np.array(self.sim.data.site_xvelp[self.sim.model.site_name2id(self.eef_name)]) # self.ee_ori_vel = np.array(self.sim.data.site_xvelr[self.sim.model.site_name2id(self.eef_name)]) # TODO (chongyi zheng): Do we need this? eef_pose = self.sim.get_eef_pose(self.eef_name) self.ee_pos = np.array(eef_pose[0]) self.ee_ori_mat = T.quat2mat(np.array(eef_pose[1])) ee_vel = self.sim.get_eef_vel(self.eef_name, self.joint_index) self.ee_pos_vel = np.array(ee_vel[0]) self.ee_ori_vel = np.array(ee_vel[1]) self.joint_pos = np.array(self.sim.get_joint_pos(joint_index=self.joint_index)) self.joint_vel = np.array(self.sim.get_joint_vel(joint_index=self.joint_index)) # self.J_pos = np.array(self.sim.data.get_site_jacp(self.eef_name).reshape((3, -1))[:, self.qvel_index]) # self.J_ori = np.array(self.sim.data.get_site_jacr(self.eef_name).reshape((3, -1))[:, self.qvel_index]) # self.J_full = np.array(np.vstack([self.J_pos, self.J_ori])) jac = self.sim.get_jacobian(self.eef_name, raw=True) self.J_pos = np.array(jac[0][:, self.qvel_index]) self.J_ori = np.array(jac[1][:, self.qvel_index]) self.J_full = np.array(jac[2][:, self.qvel_index]) # mass_matrix = np.ndarray(shape=(len(self.sim.data.qvel) ** 2,), dtype=np.float64, order='C') # mujoco_py.cymj._mj_fullM(self.sim.model, mass_matrix, self.sim.data.qM) # mass_matrix = np.reshape(mass_matrix, (len(self.sim.data.qvel), len(self.sim.data.qvel))) # self.mass_matrix = mass_matrix[self.qvel_index, :][:, self.qvel_index] self.mass_matrix = np.array(self.sim.get_mass_matrix())[self.qvel_index, :][:, self.qvel_index] # Clear self.new_update self.new_update = False
def _randomize_direction(self, name): """ Helper function to randomize direction of a specific light source Args: name (str): Name of the lighting source to randomize for """ # sample a small, random axis-angle delta rotation random_axis, random_angle = trans.random_axis_angle(angle_limit=self.direction_perturbation_size, random_state=self.random_state) random_delta_rot = trans.quat2mat(trans.axisangle2quat(random_axis * random_angle)) # rotate direction by this delta rotation and set the new direction new_dir = random_delta_rot.dot(self._defaults[name]['dir']) self.set_dir( name, new_dir, )
def move_camera(env, direction, scale, camera_id): """ Move the camera view along a direction (in the camera frame). :param direction: a 3-dim numpy array for where to move camera in camera frame :param scale: a float for how much to move along that direction :param camera_id: which camera to modify """ # current camera pose camera_pos = np.array(env.sim.data.get_mocap_pos("cameramover")) camera_rot = T.quat2mat( T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw')) # move along camera frame axis and set new position camera_pos += scale * camera_rot.dot(direction) env.sim.data.set_mocap_pos("cameramover", camera_pos) env.sim.forward()
def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 2.25 is provided if the cube is lifted Un-normalized summed components if using reward shaping: - Reaching: in [0, 1], to encourage the arm to reach the cube - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube - Lifting: in {0, 1}, non-zero if arm has lifted the cube The sparse reward only consists of the lifting component. Note that the final reward is normalized and scaled by reward_scale / 2.25 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ chassis_body_id = self.sim.model.body_name2id( self.robots[0].robot_model.robot_base) body_pos = self.sim.data.body_xpos[chassis_body_id] quat = np.array([ self.sim.data.qpos[x] for x in self.robots[0]._ref_chassis_pos_indexes ])[3:] mat = quat2mat(quat) euler = mat2euler(mat) pitch = euler[1] ctrl_norm = np.linalg.norm(self.sim.data.ctrl[ self.robots[0]._ref_joint_torq_actuator_indexes]) reward = -1*((10*(body_pos[2]-0.37))**2)-0.03*(ctrl_norm**2)-3*abs(pitch) \ - 0.2*((10*body_pos[0])**2+(10*body_pos[1])**2) # Scale reward if requested if self.reward_scale is not None: reward *= self.reward_scale return reward
def move_camera(env, direction, scale, cam_body_id): """ Move the camera view along a direction (in the camera frame). Args: direction (np.arry): 3-array for where to move camera in camera frame scale (float): how much to move along that direction cam_body_id (int): id corresponding to parent body of camera element """ # current camera pose camera_pos = np.array(env.sim.model.body_pos[cam_body_id]) camera_rot = T.quat2mat(T.convert_quat(env.sim.model.body_quat[cam_body_id], to='xyzw')) # move along camera frame axis and set new position camera_pos += scale * camera_rot.dot(direction) env.sim.model.body_pos[cam_body_id] = camera_pos env.sim.forward()
def move_camera(self, direction, scale): """ Move the camera view along a direction (in the camera frame). Args: direction (3-array): direction vector for where to move camera in camera frame scale (float): how much to move along that direction """ # current camera rotation + pos camera_pos = np.array(self.env.sim.data.get_mocap_pos(self.mover_body_name)) camera_quat = self.env.sim.data.get_mocap_quat(self.mover_body_name) camera_rot = T.quat2mat(T.convert_quat(camera_quat, to="xyzw")) # move along camera frame axis and set new position camera_pos += scale * camera_rot.dot(direction) self.set_camera_pose(pos=camera_pos) return camera_pos, camera_quat
def update_obs(self, obs): # Update Observed value self.eef_pos = obs['robot0_eef_pos'] # array 1x3 self.eef_quat = obs['robot0_eef_quat'] # array 1x4 self.eef_quat[0] = self.eef_quat[1] self.eef_quat[1] = self.eef_quat[0] self.eef_quat[2] = -self.eef_quat[2] self.eef_vel = obs['robot0_gripper_qvel'][0:3] self.peg_pos = obs['peg_pos'] + quat2mat(obs['peg_quat']) @ np.array( [0, 0, 0.015]) # array 1x3 self.peg_quat = obs['peg_quat'] # array 1X4 self.eef_to_peg_pos = self.peg_pos - self.eef_pos # array 1x3 self.eef_to_peg_quat = self.peg_quat - self.eef_quat # array 1x3 self.hole_pos = obs[ 'plate_pos'] #+ quat2mat(obs['plate_quat']) @ np.array([0.155636,0.1507,0.1]) # array 1x3 self.eef_to_hole_pos = self.hole_pos - self.eef_pos # array 1x3 self.hole_to_peg_pos = self.peg_pos - self.hole_pos # array 1x3 # decide status based on the observation self.decide_status()
def _make_input(self, action, old_quat): """ Helper function that returns a dictionary with keys dpos, rotation from a raw input array. The first three elements are taken to be displacement in position, and a quaternion indicating the change in rotation with respect to @old_quat. Additionally clips @action as well Args: action (np.array) should have form: [dx, dy, dz, ax, ay, az] (orientation in scaled axis-angle form) old_quat (np.array) the old target quaternion that will be updated with the relative change in @action """ # Clip action appropriately dpos, rotation = self._clip_ik_input(action[:3], action[3:]) # Update reference targets self.reference_target_pos += dpos * self.user_sensitivity self.reference_target_orn = T.quat_multiply(old_quat, rotation) return {"dpos": dpos * self.user_sensitivity, "rotation": T.quat2mat(rotation)}
def rotate_camera(env, direction, angle, camera_id): """ Rotate the camera view about a direction (in the camera frame). :param direction: a 3-dim numpy array for where to move camera in camera frame :param angle: a float for how much to rotate about that direction :param camera_id: which camera to modify """ # current camera rotation camera_rot = T.quat2mat( T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw')) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, direction, point=None) camera_rot = camera_rot.dot(R[:3, :3]) # set new rotation env.sim.data.set_mocap_quat( "cameramover", T.convert_quat(T.mat2quat(camera_rot), to='wxyz')) env.sim.forward()
def rotate_camera(env, direction, angle, cam_body_id): """ Rotate the camera view about a direction (in the camera frame). Args: direction (np.array): 3-array for where to move camera in camera frame angle (float): how much to rotate about that direction cam_body_id (int): id corresponding to parent body of camera element """ # current camera rotation camera_pos = np.array(env.sim.model.body_pos[cam_body_id]) camera_rot = T.quat2mat(T.convert_quat(env.sim.model.body_quat[cam_body_id], to='xyzw')) # rotate by angle and direction to get new camera rotation rad = np.pi * angle / 180.0 R = T.rotation_matrix(rad, direction, point=None) camera_rot = camera_rot.dot(R[:3, :3]) # set new rotation env.sim.model.body_quat[cam_body_id] = T.convert_quat(T.mat2quat(camera_rot), to='wxyz') env.sim.forward()
def get_interpolated_goal(self): """ Provides the next step in interpolation given the remaining steps. NOTE: If this interpolator is for orientation, it is assumed to be receiving either euler angles or quaternions Returns: np.array: Next position in the interpolated trajectory """ # Grab start position x = np.array(self.start) # Calculate the desired next step based on remaining interpolation steps if self.ori_interpolate is not None: # This is an orientation interpolation, so we interpolate linearly around a sphere instead goal = np.array(self.goal) if self.ori_interpolate == "euler": # this is assumed to be euler angles (x,y,z), so we need to first map to quat x = T.mat2quat(T.euler2mat(x)) goal = T.mat2quat(T.euler2mat(self.goal)) # Interpolate to the next sequence x_current = T.quat_slerp(x, goal, fraction=(self.step + 1) / self.total_steps) if self.ori_interpolate == "euler": # Map back to euler x_current = T.mat2euler(T.quat2mat(x_current)) else: # This is a normal interpolation dx = (self.goal - x) / (self.total_steps - self.step) x_current = x + dx # Increment step if there's still steps remaining based on ramp ratio if self.step < self.total_steps - 1: self.step += 1 # Return the new interpolated step return x_current
def reset(self, deterministic=False): """ Sets initial pose of arm and grippers. Overrides robot joint configuration if we're using a deterministic reset (e.g.: hard reset from xml file) Args: deterministic (bool): If true, will not randomize initializations within the sim Raises: ValueError: [Invalid noise type] """ init_qpos = np.array(self.init_qpos) if not deterministic: # Determine noise if self.initialization_noise["type"] == "gaussian": noise = np.random.randn(len(self.init_qpos)) * self.initialization_noise["magnitude"] elif self.initialization_noise["type"] == "uniform": noise = np.random.uniform(-1.0, 1.0, len(self.init_qpos)) * self.initialization_noise["magnitude"] else: raise ValueError("Error: Invalid noise type specified. Options are 'gaussian' or 'uniform'.") init_qpos += noise # TODO (chongyi zheng): move to initial position with moveit_commander! # Set initial position in sim # self.sim.data.qpos[self._ref_joint_pos_indexes] = init_qpos # init_joint_positions = dict(zip(self.robot_joints, init_qpos)) # self.sim.goto_arm_positions(init_joint_positions, wait=True) self.sim.set_joint_qpos(self.robot_joints, init_qpos) # Load controllers self._load_controller() # TODO (chongyi zheng): Do we need this? # Update base pos / ori references # self.base_pos = self.sim.data.get_body_xpos(self.robot_model.robot_base) # self.base_ori = T.mat2quat(self.sim.data.get_body_xmat(self.robot_model.robot_base).reshape((3, 3))) base_pose = self.sim.get_body_pose(self.robot_model.robot_base) self.base_pos = np.array(base_pose[0]) self.base_ori = T.quat2mat(base_pose[1])
def set_goal(self, action, set_pos=None, set_ori=None): """ Sets goal based on input @action. If self.impedance_mode is not "fixed", then the input will be parsed into the delta values to update the goal position / pose and the kp and/or damping_ratio values to be immediately updated internally before executing the proceeding control loop. Note that @action expected to be in the following format, based on impedance mode! :Mode `'fixed'`: [joint pos command] :Mode `'variable'`: [damping_ratio values, kp values, joint pos command] :Mode `'variable_kp'`: [kp values, joint pos command] Args: action (Iterable): Desired relative joint position goal state set_pos (Iterable): If set, overrides @action and sets the desired absolute eef position goal state set_ori (Iterable): IF set, overrides @action and sets the desired absolute eef orientation goal state """ # Update state self.update() # Parse action based on the impedance mode, and update kp / kd as necessary if self.impedance_mode == "variable": damping_ratio, kp, delta = action[:6], action[6:12], action[12:] self.kp = np.clip(kp, self.kp_min, self.kp_max) self.kd = 2 * np.sqrt(self.kp) * np.clip( damping_ratio, self.damping_ratio_min, self.damping_ratio_max) elif self.impedance_mode == "variable_kp": kp, delta = action[:6], action[6:] self.kp = np.clip(kp, self.kp_min, self.kp_max) self.kd = 2 * np.sqrt(self.kp) # critically damped else: # This is case "fixed" delta = action # If we're using deltas, interpret actions as such if self.use_delta: if delta is not None: scaled_delta = self.scale_action(delta) if not self.use_ori: # Set default control for ori since user isn't actively controlling ori set_ori = np.array([[-1., 0., 0.], [0., 1., 0.], [0., 0., -1.]]) else: scaled_delta = [] # Else, interpret actions as absolute values else: set_pos = delta[:3] # Set default control for ori if we're only using position control set_ori = T.quat2mat(T.axisangle2quat(delta[3:6])) if self.use_ori else \ np.array([[-1., 0., 0.], [0., 1., 0.], [0., 0., -1.]]) scaled_delta = [] # We only want to update goal orientation if there is a valid delta ori value # use math.isclose instead of numpy because numpy is slow bools = [ 0. if math.isclose(elem, 0.) else 1. for elem in scaled_delta[3:] ] if sum(bools) > 0.: self.goal_ori = set_goal_orientation( scaled_delta[3:], self.ee_ori_mat, orientation_limit=self.orientation_limits, set_ori=set_ori) self.goal_pos = set_goal_position(scaled_delta[:3], self.ee_pos, position_limit=self.position_limits, set_pos=set_pos) if self.interpolator_pos is not None: self.interpolator_pos.set_goal(self.goal_pos) if self.interpolator_ori is not None: self.ori_ref = np.array( self.ee_ori_mat ) # reference is the current orientation at start self.interpolator_ori.set_goal( orientation_error( self.goal_ori, self.ori_ref)) # goal is the total orientation error self.relative_ori = np.zeros( 3) # relative orientation always starts at 0
def reward(self, action=None): """ Reward function for the task. Sparse un-normalized reward: - a discrete reward of 3.0 is provided if the pot is lifted and is parallel within 30 deg to the table Un-normalized summed components if using reward shaping: - Reaching: in [0, 0.5], per-arm component that is proportional to the distance between each arm and its respective pot handle, and exactly 0.5 when grasping the handle - Note that the agent only gets the lifting reward when flipping no more than 30 degrees. - Grasping: in {0, 0.25}, binary per-arm component awarded if the gripper is grasping its correct handle - Lifting: in [0, 1.5], proportional to the pot's height above the table, and capped at a certain threshold Note that the final reward is normalized and scaled by reward_scale / 3.0 as well so that the max score is equal to reward_scale Args: action (np array): [NOT USED] Returns: float: reward value """ reward = 0 # check if the pot is tilted more than 30 degrees mat = T.quat2mat(self._pot_quat) z_unit = [0, 0, 1] z_rotated = np.matmul(mat, z_unit) cos_z = np.dot(z_unit, z_rotated) cos_30 = np.cos(np.pi / 6) direction_coef = 1 if cos_z >= cos_30 else 0 # check for goal completion: cube is higher than the table top above a margin if self._check_success(): reward = 3.0 * direction_coef # use a shaping reward elif self.reward_shaping: # lifting reward pot_bottom_height = self.sim.data.site_xpos[self.pot_center_id][2] - self.pot.top_offset[2] table_height = self.sim.data.site_xpos[self.table_top_id][2] elevation = pot_bottom_height - table_height r_lift = min(max(elevation - 0.05, 0), 0.15) reward += 10. * direction_coef * r_lift _gripper0_to_handle0 = self._gripper0_to_handle0 _gripper1_to_handle1 = self._gripper1_to_handle1 # gh stands for gripper-handle # When grippers are far away, tell them to be closer # Get contacts (g0, g1) = (self.robots[0].gripper["right"], self.robots[0].gripper["left"]) if \ self.env_configuration == "bimanual" else (self.robots[0].gripper, self.robots[1].gripper) _g0h_dist = np.linalg.norm(_gripper0_to_handle0) _g1h_dist = np.linalg.norm(_gripper1_to_handle1) # Grasping reward if self._check_grasp(gripper=g0, object_geoms=self.pot.handle0_geoms): reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g0h_dist)) # Grasping reward if self._check_grasp(gripper=g1, object_geoms=self.pot.handle1_geoms): reward += 0.25 # Reaching reward reward += 0.5 * (1 - np.tanh(10.0 * _g1h_dist)) if self.reward_scale is not None: reward *= self.reward_scale / 3.0 return reward
def set_goal_orientation(delta, current_orientation, orientation_limit=None, set_ori=None): """ Calculates and returns the desired goal orientation, clipping the result accordingly to @orientation_limits. @delta and @current_orientation must be specified if a relative goal is requested, else @set_ori must be an orientation matrix specified to define a global orientation Args: delta (np.array): Desired relative change in orientation, in axis-angle form [ax, ay, az] current_orientation (np.array): Current orientation, in rotation matrix form orientation_limit (None or np.array): 2d array defining the (min, max) limits of permissible orientation goal commands set_ori (None or np.array): If set, will ignore @delta and set the goal orientation to this value Returns: np.array: calculated goal orientation in absolute coordinates Raises: ValueError: [Invalid orientation_limit shape] """ # directly set orientation if set_ori is not None: goal_orientation = set_ori # otherwise use delta to set goal orientation else: # convert axis-angle value to rotation matrix quat_error = trans.axisangle2quat(delta) rotation_mat_error = trans.quat2mat(quat_error) goal_orientation = np.dot(rotation_mat_error, current_orientation) # check for orientation limits if np.array(orientation_limit).any(): if orientation_limit.shape != (2, 3): raise ValueError("Orientation limit should be shaped (2,3) " "but is instead: {}".format( orientation_limit.shape)) # Convert to euler angles for clipping euler = trans.mat2euler(goal_orientation) # Clip euler angles according to specified limits limited = False for idx in range(3): if orientation_limit[0][idx] < orientation_limit[1][ idx]: # Normal angle sector meaning if orientation_limit[0][idx] < euler[idx] < orientation_limit[ 1][idx]: continue else: limited = True dist_to_lower = euler[idx] - orientation_limit[0][idx] if dist_to_lower > np.pi: dist_to_lower -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_lower += 2 * np.pi dist_to_higher = euler[idx] - orientation_limit[1][idx] if dist_to_lower > np.pi: dist_to_higher -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_higher += 2 * np.pi if dist_to_lower < dist_to_higher: euler[idx] = orientation_limit[0][idx] else: euler[idx] = orientation_limit[1][idx] else: # Inverted angle sector meaning if orientation_limit[0][idx] < euler[idx] or euler[ idx] < orientation_limit[1][idx]: continue else: limited = True dist_to_lower = euler[idx] - orientation_limit[0][idx] if dist_to_lower > np.pi: dist_to_lower -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_lower += 2 * np.pi dist_to_higher = euler[idx] - orientation_limit[1][idx] if dist_to_lower > np.pi: dist_to_higher -= 2 * np.pi elif dist_to_lower < -np.pi: dist_to_higher += 2 * np.pi if dist_to_lower < dist_to_higher: euler[idx] = orientation_limit[0][idx] else: euler[idx] = orientation_limit[1][idx] if limited: goal_orientation = trans.euler2mat( np.array([euler[0], euler[1], euler[2]])) return goal_orientation
camera_name='agentview', # use "agentview" camera use_object_obs=True, # no object feature when training on pixels camera_depth=True, target_object='cereal') env = IKWrapper(env) # reset the environment env.reset() f, cx, cy = env._get_cam_K(CAM_ID) K = np.array([[f, 0, cx], [0, f, cy], [0, 0, 1]]) print(K) # Get original camera location cam_pos, cam_quat = env._get_cam_pos(CAM_ID) cam_rot = T.quat2mat(cam_quat) cam_pose = np.eye(4) cam_pose[:3, :3] = cam_rot cam_pose[:3, 3] = cam_pos print(cam_pose) # Set new camera location table_pos, table_quat = env._get_body_pos("table") print("table:", table_pos) cam_pose_new = T.rotation_matrix(math.pi / 2, np.array([0, 0, 1]), table_pos) cam_pose_new = np.matmul(cam_pose_new, cam_pose) cam_quat_new = T.mat2quat(cam_pose_new[:3, :3]) cam_pos_new = cam_pose_new[:3, 3] env._set_cam_pos(CAM_ID, cam_pos_new, cam_quat_new) print(cam_pose_new)