def _get_reward(self, state, action): ''' Compute single step reward. ''' reward_info = dict() reward = 0. if self.pos_reward: pos_err = self.data.qpos[ self.block_pos_idx][:3] - self.block_target_position[:3] reward_info['block_pos_reward'] = -np.linalg.norm(pos_err) * 10 reward += reward_info['block_pos_reward'] if self.rot_reward: rot_err = subQuat(self.data.qpos[self.block_pos_idx][3:], self.block_target_position[3:]) reward_info['block_rot_reward'] = -np.linalg.norm(rot_err) reward += reward_info['block_rot_reward'] if self.pos_vel_reward: raise NotImplementedError # TODO: this should give positive reward when moving towards the goal and negative reward when moving away from the goal pos_vel = self.data.qvel[self.block_vel_idx[:3]] reward_info['block_pos_vel_reward'] = -np.linalg.norm(pos_vel) reward += reward_info['block_pos_vel_reward'] if self.rot_vel_reward: raise NotImplementedError # TODO: this should give positive reward when moving towards the goal and negative reward when moving away from the goal rot_vel = self.data.qvel[self.block_vel_idx[3:]] reward_info['block_rot_vel_reward'] = -np.linalg.norm(rot_vel) reward += reward_info['block_rot_vel_reward'] if self.peg_tip_height_reward or self.peg_tip_orientation_reward: tip_pos, tip_rot = forwardKinSite(self.sim, ['peg_tip']) tip_pos = tip_pos[0] tip_rot = tip_rot[0] if self.peg_tip_height_reward: tip_height_err = tip_pos[2] - self.table_height reward_info['peg_tip_height_reward'] = -tip_height_err**2 reward += reward_info['peg_tip_height_reward'] if self.peg_tip_orientation_reward: tip_quat = mat2Quat(tip_rot) tip_rot_err = subQuat(tip_quat, np.array([0., 1., 0., 0.])) reward_info['peg_tip_orientation_reward'] = -( np.linalg.norm(tip_rot_err) / 10)**2 reward += reward_info['peg_tip_orientation_reward'] if self.contact_reward: contacts = self.sim.data.contact[:self.sim.data.ncon] contact = 0. for c in contacts: geoms = c.geom1, c.geom2 if (self.tip_geom_id in geoms) and (self.block_geom_id in geoms): contact = 1. break reward_info['contact_reward'] = contact * .1 reward += reward_info['contact_reward'] return reward * self.reward_scale, reward_info
def _get_state_obs(self): ''' Compute the observation at the current state. ''' # Return superclass observation. if self.observe_joints: obs = super(PegInsertionEnv, self)._get_state_obs() else: obs = np.zeros(0) # Return superclass observation stacked with the ft observation. if not self.initialized: ft_obs = np.zeros(6) else: # Compute F/T sensor data ft_obs = self.sim.data.sensordata obs = obs / self.obs_scaling if self.use_ft_sensor: obs = np.concatenate([obs, ft_obs]) # End effector position pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base', 'hole_top']) if self.use_rel_pos_err: pos_obs = pos[1] - pos[0] quat_peg_tip = mat2Quat(rot[0]) quat_hole_base = mat2Quat(rot[1]) rot_obs = subQuat(quat_hole_base, quat_peg_tip).copy() hole_top_obs = pos[2] - pos[0] else: # TODO: we probably also want the EE position in the world pos_obs = pos[1].copy() rot_obs = mat2Quat(rot[1]) hole_top_obs = pos[2] # End effector velocity peg_tip_id = self.model.site_name2id('peg_tip') jacp, jacr = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) peg_tip_lin_vel = jacp.dot(self.sim.data.qvel) peg_tip_rot_vel = jacr.dot(self.sim.data.qvel) # Transform into end effector frame if self.in_peg_frame: pos_obs = rot[0].T.dot(pos_obs) hole_top_obs = rot[0].T.dot(hole_top_obs) peg_tip_lin_vel = rot[0].T.dot(peg_tip_lin_vel) peg_tip_rot_vel = rot[0].T.dot(peg_tip_rot_vel) obs = np.concatenate([ obs, pos_obs, rot_obs, peg_tip_lin_vel, peg_tip_rot_vel, hole_top_obs ]) return obs
def get_torque(self): ''' Update the impedance control setpoint and compute the torque. ''' # Compute the pose difference. pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False) quat = mat2Quat(mat) dx = self.pos_set - pos dr = subQuat(self.quat_set, quat) dframe = np.concatenate((dx,dr)) # Compute generalized forces from a virtual external force. jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False) J = np.vstack((jpos, jrot)) external_force = J.T.dot(self.stiffness*dframe) # virtual force on the end effector # Cancel other dynamics and add virtual damping using inverse dynamics. acc_des = -self.damping*self.sim.data.qvel self.sim.data.qacc[:] = acc_des mujoco_py.functions.mj_inverse(self.model, self.sim.data) id_torque = self.sim.data.qfrc_inverse[:] generalized_force = id_torque + external_force if self.controlled_joints is None: torque = generalized_force else: torque = generalized_force[self.controlled_joints] return torque
def get_torque(self): ''' Update the impedance control setpoint and compute the torque. ''' # Compute the pose difference. pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False) quat = mat2Quat(mat) dx = self.pos_set - pos dr = subQuat(self.quat_set, quat) # Original dframe = np.concatenate((dx,dr)) # Compute generalized forces from a virtual external force. jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False) J = np.vstack((jpos[:,self.sim_qvel_idx], jrot[:,self.sim_qvel_idx])) cartesian_acc_des = self.stiffness*dframe - self.damping*J.dot(self.sim.data.qvel[self.sim_qvel_idx]) impedance_acc_des = J.T.dot(np.linalg.solve(J.dot(J.T) + 1e-6*np.eye(6), cartesian_acc_des)) # Add stiffness and damping in the null space of the the Jacobian projection_matrix = J.T.dot(np.linalg.solve(J.dot(J.T), J)) projection_matrix = np.eye(projection_matrix.shape[0]) - projection_matrix null_space_control = -self.null_space_damping*self.sim.data.qvel[self.sim_qvel_idx] null_space_control += -self.null_space_stiffness*(self.sim.data.qpos[self.sim_qpos_idx] - self.nominal_qpos) impedance_acc_des += projection_matrix.dot(null_space_control) # Compute torques via inverse dynamics. acc_des = np.zeros(self.sim.model.nv) acc_des[self.sim_qvel_idx] = impedance_acc_des self.sim.data.qacc[:] = acc_des mujoco_py.functions.mj_inverse(self.model, self.sim.data) id_torque = self.sim.data.qfrc_inverse[self.sim_actuators_idx].copy() return id_torque
def residuals(q): sim.data.qpos[qpos_idx] = q xpos, xrot = forwardKin(sim, body_pos, identity_quat, body_id) quat = mat2Quat(xrot) q_diff = subQuat(quat, world_quat) res = np.concatenate((xpos-world_pos, q_diff, reg*(q-q_nom))) return res
def _get_reward(self, state, action): ''' Compute single step reward. ''' # compute position and rotation error pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base'], recompute=False) pos_err = pos[0] - pos[1] dist = np.sqrt(pos_err.dot(pos_err)) peg_quat = mat2Quat(rot[0]) hole_quat = mat2Quat(rot[1]) rot_err = subQuat(peg_quat, hole_quat) pose_err = self.sim.data.qpos - self.good_states[0] peg_tip_id = self.model.site_name2id('peg_tip') jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) peg_tip_vel = jacp.dot(self.data.qvel[:]) # print("reward_dist: {}".format(dist)) # quadratic cost on the error and action # rotate the cost terms to align with the hole Q_pos = rotate_cost_by_matrix(self.Q_pos, rot[1].T) # Q_vel = rotate_cost_by_matrix(self.Q_vel,rot[1].T) Q_rot = self.Q_rot reward_info = dict() reward = 0. # reward_info['quaternion_reward'] = -rot_err.dot(Q_rot).dot(rot_err) if self.quadratic_rot_cost: reward_info['quadratic_orientation_reward'] = -rot_err.dot(Q_rot).dot(rot_err) reward += reward_info['quadratic_orientation_reward'] if self.quadratic_cost: reward_info['quadratic_position_reward'] = -pos_err.dot(Q_pos).dot(pos_err) reward += reward_info['quadratic_position_reward'] if self.linear_cost: reward_info['linear_position_reward'] = -np.sqrt(pos_err.dot(Q_pos).dot(pos_err)) reward += reward_info['linear_position_reward'] if self.logarithmic_cost: rew_scale = 2 eps = 10.0**(-rew_scale) zero_crossing = 0.05 reward_info['logarithmic_position_reward'] = -np.log10(np.sqrt(pos_err.dot(Q_pos).dot(pos_err))/zero_crossing*(1-eps) + eps) reward += reward_info['logarithmic_position_reward'] if self.sparse_cost: reward_info['sparse_position_reward'] = 10.0 if np.sqrt(pos_err.dot(pos_err)) < 1e-2 else 0 reward += reward_info['sparse_position_reward'] if self.regularize_pose: reward_info['pose_regularizer_reward'] = -pose_err.dot(self.Q_pose_reg).dot(pose_err) reward += reward_info['pose_regularizer_reward'] # reward_info['velocity_reward'] = -np.sqrt(peg_tip_vel.dot(Q_vel).dot(peg_tip_vel)) # reward += reward_info['velocity_reward'] return reward, reward_info
def test_forwardKinRotJacobian(): # Build the model path. model_filename = 'full_kuka_no_collision.xml' model_path = os.path.join('..', '..', 'envs', 'assets', model_filename) # Construct the model and simulation objects. model = mujoco_py.load_model_from_path(model_path) sim = mujoco_py.MjSim(model) # Set an arbitrary state. q_nom = np.array([.1, .2, .3, .4, .5, .6, .7]) sim.data.qpos[:] = q_nom # Choose an arbitrary point and a link with a dense jacobian. body_id = model.body_name2id('kuka_link_7') pos = np.array([.1, .2, .3]) quat = np.array([1., 0., 0., 0.]) # Compute the forward kinematics and kinematic jacobian. _, xrot = forwardKin(sim, pos, quat, body_id) xquat = mat2Quat(xrot) _, jacr = forwardKinJacobian(sim, pos, body_id) # Compute the kinematic jacobian with finite differences. jac_approx = np.zeros((3, 7)) for i in range(7): dq = np.zeros(model.nq) dq[i] = 1e-6 sim.data.qpos[:] = q_nom + dq _, xrot_ = forwardKin(sim, pos, quat, body_id) xquat_ = mat2Quat(xrot_) jac_approx[:, i] = subQuat(xquat_, xquat) / 1e-6 assert np.allclose(jac_approx - jacr, np.zeros_like(jacr), atol=1e-6)
def _get_reward(self, state, action): ''' Compute single step reward. ''' # Compute peg tip velocity. # peg_tip_id = self.model.site_name2id('peg_tip') # jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) # peg_tip_vel = jacp.dot(self.data.qvel[:]) reward_info = dict() reward = 0. if self.use_qpos_cost: qpos_err = self.sim.data.qpos - self.target_qpos reward_info['qpos_reward'] = -qpos_err.dot(self.Q_qpos).dot(qpos_err) reward += reward_info['qpos_reward'] if self.use_qvel_cost: reward_info['qvel_reward'] = -self.data.qvel.dot(self.Q_qvel).dot(self.data.qvel) reward += reward_info['qvel_reward'] if self.use_pos_cost or self.use_rot_cost: # compute position and rotation error pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False) peg_pos = pos[0] peg_quat = mat2Quat(rot[0]) if self.use_pos_cost: pos_err = peg_pos - self.target_pos reward_info['pos_reward'] = -pos_err.dot(self.Q_pos).dot(pos_err) reward += reward_info['pos_reward'] if self.use_rot_cost: rot_err = subQuat(peg_quat, self.target_quat) reward_info['rot_reward'] = -rot_err.dot(self.Q_rot).dot(rot_err) reward += reward_info['rot_reward'] return reward, reward_info
def _get_info(self): info = dict() pos_err = self.data.qpos[ self.block_pos_idx][:3] - self.block_target_position[:3] info['block_pos_dist'] = np.linalg.norm(pos_err) rot_err = subQuat(self.data.qpos[self.block_pos_idx][3:], self.block_target_position[3:]) info['block_rot_dist'] = np.linalg.norm(rot_err) return info
def _get_target_obs(self): # Compute relative position error raise NotImplementedError if self.use_rel_pos_err: pos, rot = forwardKinSite(self.sim, ['hammer_tip', 'nail_top']) pos_obs = pos[0] - pos[1] quat_hammer_tip = mat2Quat(rot[0]) quat_nail_top = mat2Quat(rot[1]) rot_obs = subQuat(quat_hammer_tip, quat_nail_top) return np.concatenate([pos_obs, rot_obs]) else: return np.array([self.data.qpos[self.nail_idx]])
def compute_distance_travelled(x_pos, x_rot): pos_dist = 0 rot_dist = 0 for i in range(len(x_pos) - 1): dx = x_pos[i + 1] - x_pos[i] pos_dist += np.linalg.norm(dx) x_quat = mat2Quat(x_rot[i]) x_quat_next = mat2Quat(x_rot[i + 1]) dq = subQuat(x_quat, x_quat_next) rot_dist += np.linalg.norm(dq) return pos_dist, rot_dist
def _get_info(self): # Build the info dict. info = dict() # Joint space distances. qpos_err = self.data.qpos - self.target_qpos info['qpos_dist'] = np.linalg.norm(qpos_err) info['qvel_dist'] = np.linalg.norm(self.data.qvel) # Task space distances. pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False) pos_err = pos[0] - self.target_pos info['pos_dist'] = np.linalg.norm(pos_err) rot_err = subQuat(mat2Quat(rot[0]), self.target_quat) info['rot_dist'] = np.linalg.norm(rot_err) # Success metric. info['success'] = float(info['pos_dist'] < 1e-2) return info
def _get_state_obs(self): ''' Compute the observation at the current state. ''' # Return superclass observation. if self.use_joint_observation: obs = super(HoldPositionEnv, self)._get_state_obs() else: obs = np.zeros(0) # Compute the relative pose information. if self.use_rel_pose_observation: if self.initialized: pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False) pos_err = pos[0] - self.target_pos rot_err = subQuat(mat2Quat(rot[0]), self.target_quat) else: pos_err = np.zeros(3) rot_err = np.zeros(3) obs = np.concatenate([obs, pos_err, rot_err]) return obs