def _get_state_obs(self): ''' Compute the observation at the current state. ''' # Return superclass observation. if self.observe_joints: obs = super(PegInsertionEnv, self)._get_state_obs() else: obs = np.zeros(0) # Return superclass observation stacked with the ft observation. if not self.initialized: ft_obs = np.zeros(6) else: # Compute F/T sensor data ft_obs = self.sim.data.sensordata obs = obs / self.obs_scaling if self.use_ft_sensor: obs = np.concatenate([obs, ft_obs]) # End effector position pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base', 'hole_top']) if self.use_rel_pos_err: pos_obs = pos[1] - pos[0] quat_peg_tip = mat2Quat(rot[0]) quat_hole_base = mat2Quat(rot[1]) rot_obs = subQuat(quat_hole_base, quat_peg_tip).copy() hole_top_obs = pos[2] - pos[0] else: # TODO: we probably also want the EE position in the world pos_obs = pos[1].copy() rot_obs = mat2Quat(rot[1]) hole_top_obs = pos[2] # End effector velocity peg_tip_id = self.model.site_name2id('peg_tip') jacp, jacr = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) peg_tip_lin_vel = jacp.dot(self.sim.data.qvel) peg_tip_rot_vel = jacr.dot(self.sim.data.qvel) # Transform into end effector frame if self.in_peg_frame: pos_obs = rot[0].T.dot(pos_obs) hole_top_obs = rot[0].T.dot(hole_top_obs) peg_tip_lin_vel = rot[0].T.dot(peg_tip_lin_vel) peg_tip_rot_vel = rot[0].T.dot(peg_tip_rot_vel) obs = np.concatenate([ obs, pos_obs, rot_obs, peg_tip_lin_vel, peg_tip_rot_vel, hole_top_obs ]) return obs
def get_torque(self): ''' Update the impedance control setpoint and compute the torque. ''' # Compute the pose difference. pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False) quat = mat2Quat(mat) dx = self.pos_set - pos dr = subQuat(self.quat_set, quat) dframe = np.concatenate((dx,dr)) # Compute generalized forces from a virtual external force. jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False) J = np.vstack((jpos, jrot)) external_force = J.T.dot(self.stiffness*dframe) # virtual force on the end effector # Cancel other dynamics and add virtual damping using inverse dynamics. acc_des = -self.damping*self.sim.data.qvel self.sim.data.qacc[:] = acc_des mujoco_py.functions.mj_inverse(self.model, self.sim.data) id_torque = self.sim.data.qfrc_inverse[:] generalized_force = id_torque + external_force if self.controlled_joints is None: torque = generalized_force else: torque = generalized_force[self.controlled_joints] return torque
def _get_reward(self, state, action): ''' Compute single step reward. ''' # compute position and rotation error pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base'], recompute=False) pos_err = pos[0] - pos[1] dist = np.sqrt(pos_err.dot(pos_err)) peg_quat = mat2Quat(rot[0]) hole_quat = mat2Quat(rot[1]) rot_err = subQuat(peg_quat, hole_quat) pose_err = self.sim.data.qpos - self.good_states[0] peg_tip_id = self.model.site_name2id('peg_tip') jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) peg_tip_vel = jacp.dot(self.data.qvel[:]) # print("reward_dist: {}".format(dist)) # quadratic cost on the error and action # rotate the cost terms to align with the hole Q_pos = rotate_cost_by_matrix(self.Q_pos, rot[1].T) # Q_vel = rotate_cost_by_matrix(self.Q_vel,rot[1].T) Q_rot = self.Q_rot reward_info = dict() reward = 0. # reward_info['quaternion_reward'] = -rot_err.dot(Q_rot).dot(rot_err) if self.quadratic_rot_cost: reward_info['quadratic_orientation_reward'] = -rot_err.dot(Q_rot).dot(rot_err) reward += reward_info['quadratic_orientation_reward'] if self.quadratic_cost: reward_info['quadratic_position_reward'] = -pos_err.dot(Q_pos).dot(pos_err) reward += reward_info['quadratic_position_reward'] if self.linear_cost: reward_info['linear_position_reward'] = -np.sqrt(pos_err.dot(Q_pos).dot(pos_err)) reward += reward_info['linear_position_reward'] if self.logarithmic_cost: rew_scale = 2 eps = 10.0**(-rew_scale) zero_crossing = 0.05 reward_info['logarithmic_position_reward'] = -np.log10(np.sqrt(pos_err.dot(Q_pos).dot(pos_err))/zero_crossing*(1-eps) + eps) reward += reward_info['logarithmic_position_reward'] if self.sparse_cost: reward_info['sparse_position_reward'] = 10.0 if np.sqrt(pos_err.dot(pos_err)) < 1e-2 else 0 reward += reward_info['sparse_position_reward'] if self.regularize_pose: reward_info['pose_regularizer_reward'] = -pose_err.dot(self.Q_pose_reg).dot(pose_err) reward += reward_info['pose_regularizer_reward'] # reward_info['velocity_reward'] = -np.sqrt(peg_tip_vel.dot(Q_vel).dot(peg_tip_vel)) # reward += reward_info['velocity_reward'] return reward, reward_info
def get_torque(self): ''' Update the impedance control setpoint and compute the torque. ''' # Compute the pose difference. pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False) quat = mat2Quat(mat) dx = self.pos_set - pos dr = subQuat(self.quat_set, quat) # Original dframe = np.concatenate((dx,dr)) # Compute generalized forces from a virtual external force. jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False) J = np.vstack((jpos[:,self.sim_qvel_idx], jrot[:,self.sim_qvel_idx])) cartesian_acc_des = self.stiffness*dframe - self.damping*J.dot(self.sim.data.qvel[self.sim_qvel_idx]) impedance_acc_des = J.T.dot(np.linalg.solve(J.dot(J.T) + 1e-6*np.eye(6), cartesian_acc_des)) # Add stiffness and damping in the null space of the the Jacobian projection_matrix = J.T.dot(np.linalg.solve(J.dot(J.T), J)) projection_matrix = np.eye(projection_matrix.shape[0]) - projection_matrix null_space_control = -self.null_space_damping*self.sim.data.qvel[self.sim_qvel_idx] null_space_control += -self.null_space_stiffness*(self.sim.data.qpos[self.sim_qpos_idx] - self.nominal_qpos) impedance_acc_des += projection_matrix.dot(null_space_control) # Compute torques via inverse dynamics. acc_des = np.zeros(self.sim.model.nv) acc_des[self.sim_qvel_idx] = impedance_acc_des self.sim.data.qacc[:] = acc_des mujoco_py.functions.mj_inverse(self.model, self.sim.data) id_torque = self.sim.data.qfrc_inverse[self.sim_actuators_idx].copy() return id_torque
def _get_state_obs(self): ''' Compute the observation at the current state. ''' if not self.initialized: # 14 positions and 13 velocities obs = np.zeros(27) obs[10] = 1. else: # Return superclass observation. obs = super(PushingEnv, self)._get_state_obs() if not self.initialized: ee_lin_vel_obs = np.zeros(3) ee_rot_vel_obs = np.zeros(3) else: peg_tip_id = self.model.site_name2id('peg_tip') jacp, jacr = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False) ee_lin_vel_obs = jacp.dot(self.sim.data.qvel) ee_rot_vel_obs = jacr.dot(self.sim.data.qvel) obs = np.concatenate([ee_lin_vel_obs, ee_rot_vel_obs]) # Return superclass observation stacked with the ft observation. if not self.initialized: ft_obs = np.zeros(6) else: # Compute F/T sensor data ft_obs = self.sim.data.sensordata obs = obs / self.obs_scaling if self.use_ft_sensor: obs = np.concatenate([obs, ft_obs]) return obs