def _get_obs(self): # positions grip_pos = self.sim.data.get_site_xpos('robot0:grip') dt = self.sim.nsubsteps * self.sim.model.opt.timestep grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) if self.has_object: object_pos = self.sim.data.get_site_xpos('object0') # rotations object_rot = rotations.mat2euler( self.sim.data.get_site_xmat('object0')) # velocities object_velp = self.sim.data.get_site_xvelp('object0') * dt object_velr = self.sim.data.get_site_xvelr('object0') * dt # gripper state object_rel_pos = object_pos - grip_pos object_velp -= grip_velp else: object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros( 0) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[ -2:] * dt # change to a scalar if the gripper is made symmetric if not self.has_object: achieved_goal = grip_pos.copy() else: achieved_goal = np.squeeze(object_pos.copy()) obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, ]) return { 'observation': obs.copy(), 'achieved_goal': achieved_goal.copy(), 'desired_goal': self.goal.copy(), }
def _get_obs(self): obj_grp = self.max_n_objects if self.ai_object else 0 grip_pos = self.sim.data.get_site_xpos('robot0:grip') dt = self.sim.nsubsteps * self.sim.model.opt.timestep grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) gripper_state = robot_qpos[-2:] gripper_vel = robot_qvel[ -2:] * dt # change to a scalar if the gripper is made symmetric obs_all = [] object_pos, object_rot, object_velp, object_velr, object_rel_pos = [], [], [], [], [] for i_object in range(obj_grp, obj_grp + self.n_objects): # observations for the robot object_pos.append( self.sim.data.get_site_xpos('object' + str(i_object))) # rotations object_rot.append( rotations.mat2euler( self.sim.data.get_site_xmat('object' + str(i_object)))) # velocities object_velp.append( self.sim.data.get_site_xvelp('object' + str(i_object)) * dt - grip_velp) object_velr.append( self.sim.data.get_site_xvelr('object' + str(i_object)) * dt) # gripper state object_rel_pos.append( self.sim.data.get_site_xpos('object' + str(i_object)) - grip_pos) object_pos = np.asarray(object_pos) object_rot = np.asarray(object_rot) object_velp = np.asarray(object_velp) object_velr = np.asarray(object_velr) object_rel_pos = np.asarray(object_rel_pos) obs = np.concatenate([ grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel ]) if self.observe_obj_grp: obs = np.concatenate([obs, np.asarray([obj_grp])]) obs_all.append(obs.copy()) target_pos = self.goal.copy().reshape(self.n_objects, -1) target_rel_pos, target_velp = [], [] for i_object in range(obj_grp, obj_grp + self.n_objects): # observations for the object # object state wrt target target_rel_pos.append(target_pos[i_object % self.max_n_objects] - self.sim.data.get_site_xpos('object' + str(i_object))) target_velp.append( 0 - self.sim.data.get_site_xvelp('object' + str(i_object)) * dt) target_rel_pos = np.asarray(target_rel_pos) target_velp = np.asarray(target_velp) obs = np.concatenate([ np.zeros_like(grip_pos), object_pos.ravel(), target_rel_pos.ravel(), np.zeros_like(gripper_state), object_rot.ravel(), target_velp.ravel(), object_velr.ravel(), np.zeros_like(grip_velp), np.zeros_like(gripper_vel) ]) if self.observe_obj_grp: obs = np.concatenate([obs, np.asarray([self.max_n_objects])]) obs_all.append(obs.copy()) obs = np.asarray(obs_all) achieved_goal = object_pos.copy().ravel() return { 'observation': obs.copy(), 'achieved_goal': achieved_goal.copy(), 'desired_goal': self.goal.copy(), }