def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) object_pos, _ = bullet.get_object_position(self.env.blocking_object) object_lifted = object_pos[2] > self.pick_height_thresh_noisy gripper_pickpoint_dist = np.linalg.norm(self.pick_point - ee_pos) gripper_droppoint_dist = np.linalg.norm(self.drop_point - ee_pos) done = False neutral_action = [0.] if self.place_attempted: # Return to neutral, then open the drawer. if self.neutral_taken: action, info = self.drawer_policy.get_action() action_xyz = action[:3] action_angles = action[3:6] action_gripper = [action[6]] neutral_action = [action[7]] done = info['done'] else: action_xyz = [0., 0., 0.] action_angles = [0., 0., 0.] action_gripper = [0.0] neutral_action = [0.7] self.neutral_taken = True elif gripper_pickpoint_dist > 0.02 and self.env.is_gripper_open: # move near the object action_xyz = (self.pick_point - ee_pos) * self.xyz_action_scale xy_diff = np.linalg.norm(action_xyz[:2] / self.xyz_action_scale) if xy_diff > 0.03: action_xyz[2] = 0.0 action_angles = [0., 0., 0.] action_gripper = [0.0] elif self.env.is_gripper_open: # near the object enough, performs grasping action action_xyz = (self.pick_point - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [-0.7] elif not object_lifted: # lifting objects above the height threshold for picking action_xyz = (self.env.ee_pos_init - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [0.] elif gripper_droppoint_dist > 0.02: # lifted, now need to move towards the container action_xyz = (self.drop_point - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [0.] else: # already moved above the container; drop object action_xyz = [0., 0., 0.] action_angles = [0., 0., 0.] action_gripper = [0.7] self.place_attempted = True agent_info = dict(place_attempted=self.place_attempted, done=done) action = np.concatenate( (action_xyz, action_angles, action_gripper, neutral_action)) return action, agent_info
def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) object_pos, _ = bullet.get_object_position( self.env.objects[self.object_to_target]) object_lifted = object_pos[2] > self.pick_height_thresh_noisy object_gripper_dist = np.linalg.norm(object_pos - ee_pos) container_pos = self.env.container_position target_pos = np.append(container_pos[:2], container_pos[2] + 0.15) target_pos = target_pos + np.random.normal(scale=0.01) gripper_target_dist = np.linalg.norm(target_pos - ee_pos) gripper_target_threshold = 0.03 done = False if self.place_attempted: # Avoid pick and place the object again after one attempt action_xyz = [0., 0., 0.] action_angles = [0., 0., 0.] action_gripper = [0.] elif object_gripper_dist > self.dist_thresh and self.env.is_gripper_open: # move near the object action_xyz = (object_pos - ee_pos) * self.xyz_action_scale xy_diff = np.linalg.norm(action_xyz[:2] / self.xyz_action_scale) if xy_diff > 0.03: action_xyz[2] = 0.0 action_angles = [0., 0., 0.] action_gripper = [0.0] elif self.env.is_gripper_open: # near the object enough, performs grasping action action_xyz = (object_pos - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [-0.7] elif not object_lifted: # lifting objects above the height threshold for picking action_xyz = (self.env.ee_pos_init - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [0.] elif gripper_target_dist > gripper_target_threshold: # lifted, now need to move towards the container action_xyz = (target_pos - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [0.] else: # already moved above the container; drop object action_xyz = (0., 0., 0.) action_angles = [0., 0., 0.] action_gripper = [0.7] self.place_attempted = True agent_info = dict(place_attempted=self.place_attempted, done=done) action = np.concatenate((action_xyz, action_angles, action_gripper)) return action, agent_info
def reset(self): super(Widow250PickPlaceEnv, self).reset() ee_pos_init, ee_quat_init = bullet.get_link_state( self.robot_id, self.end_effector_index) ee_pos_init[2] -= 0.05 if self.start_object_in_gripper: bullet.load_state( osp.join(OBJECT_IN_GRIPPER_PATH, 'object_in_gripper_reset.bullet')) self.is_gripper_open = False return self.get_observation()
def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) handle_pos = self.env.get_drawer_handle_pos() + self.handle_offset gripper_handle_dist = np.linalg.norm(handle_pos - ee_pos) gripper_handle_xy_dist = np.linalg.norm(handle_pos[:2] - ee_pos[:2]) done = False if (gripper_handle_xy_dist > self.gripper_xy_dist_thresh and not self.env.is_drawer_open()): # print('xy - approaching handle') action_xyz = (handle_pos - ee_pos) * 7.0 action_xyz = list(action_xyz[:2]) + [0.] # don't droop down. action_angles = [0., 0., 0.] action_gripper = [0.0] elif (gripper_handle_dist > self.gripper_dist_thresh and not self.env.is_drawer_open()): # moving down toward handle action_xyz = (handle_pos - ee_pos) * 7.0 action_angles = [0., 0., 0.] action_gripper = [0.0] elif not self.env.is_drawer_open(): # print("opening drawer") x_command = (-1)**(1 - self.env.left_opening) action_xyz = np.array([x_command, 0, 0]) # action = np.asarray([0., 0., 0.7]) action_angles = [0., 0., 0.] action_gripper = [0.0] elif (np.abs(ee_pos[2] - self.ending_height_thresh) > self.gripper_dist_thresh): # print("Lift upward") self.drawer_never_opened = False action_xyz = np.array([0, 0, 0.7]) # force upward action action_angles = [0., 0., 0.] action_gripper = [0.0] else: action_xyz = [0., 0., 0.] action_angles = [0., 0., 0.] action_gripper = [0.0] agent_info = dict(done=done) action = np.concatenate((action_xyz, action_angles, action_gripper)) return action, agent_info
def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) handle_pos = self.env.get_drawer_handle_pos() + self.handle_offset gripper_handle_dist = np.linalg.norm(handle_pos - ee_pos) gripper_handle_xy_dist = np.linalg.norm(handle_pos[:2] - ee_pos[:2]) top_drawer_pos = self.env.get_drawer_pos("drawer_top") top_drawer_push_target_pos = (top_drawer_pos + np.array([0.15, 0, 0.05])) is_gripper_ready_to_push = (ee_pos[0] > top_drawer_push_target_pos[0] and ee_pos[2] < top_drawer_push_target_pos[2]) done = False neutral_action = [0.0] if (not self.env.is_top_drawer_closed() and not self.reached_pushing_region and not is_gripper_ready_to_push): # print("move up and left") action_xyz = [0.3, -0.2, -0.15] action_angles = [0., 0., 0.] action_gripper = [0.0] elif not self.env.is_top_drawer_closed(): # print("close top drawer") self.reached_pushing_region = True action_xyz = (top_drawer_pos + self.top_drawer_offset - ee_pos) * 7.0 action_xyz[0] *= 3 action_xyz[1] *= 0.6 action_angles = [0., 0., 0.] action_gripper = [0.0] else: action, info = self.bottom_drawer_policy.get_action() action_xyz = action[:3] action_angles = action[3:6] action_gripper = [action[6]] neutral_action = [action[7]] done = info['done'] agent_info = dict(done=done) action = np.concatenate( (action_xyz, action_angles, action_gripper, neutral_action)) return action, agent_info
def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) button_pos = self.env.get_button_pos() + self.button_offset gripper_button_xy_dist = np.linalg.norm(button_pos[:2] - ee_pos[:2]) done = False if (gripper_button_xy_dist > self.gripper_xy_dist_thresh and not self.env.is_button_pressed()): # print('xy - approaching handle') action_xyz = (button_pos - ee_pos) * self.xyz_action_scale action_xyz = list(action_xyz[:2]) + [0.] # don't droop down. action_angles = [0., 0., 0.] action_gripper = [0.0] elif not self.gripper_closed: # print("close gripper") action_xyz = np.array([0, 0, 0]) # action = np.asarray([0., 0., 0.7]) action_angles = [0., 0., 0.] action_gripper = [-0.7] # close gripper self.gripper_closed = True elif not self.env.is_button_pressed(): # print("opening drawer") action_xyz = (button_pos - ee_pos) * self.xyz_action_scale # action = np.asarray([0., 0., 0.7]) action_angles = [0., 0., 0.] action_gripper = [0.0] # close gripper elif (np.abs(ee_pos[2] - self.ending_height_thresh) > self.gripper_dist_thresh): # print("Lift upward") action_xyz = np.array([0, 0, 0.7]) # force upward action action_angles = [0., 0., 0.] action_gripper = [0.0] else: action_xyz = [0., 0., 0.] action_angles = [0., 0., 0.] action_gripper = [0.0] agent_info = dict(done=done) action = np.concatenate((action_xyz, action_angles, action_gripper)) return action, agent_info
def get_observation(self): gripper_state = self.get_gripper_state() gripper_binary_state = [float(self.is_gripper_open)] ee_pos, ee_quat = bullet.get_link_state( self.robot_id, self.end_effector_index) object_position, object_orientation = bullet.get_object_position( self.objects[self.target_object]) if self.observation_mode == 'pixels': image_observation = self.render_obs() image_observation = np.float32(image_observation.flatten()) / 255.0 observation = { 'object_position': object_position, 'object_orientation': object_orientation, 'state': np.concatenate( (ee_pos, ee_quat, gripper_state, gripper_binary_state)), 'image': image_observation } else: raise NotImplementedError return observation
def step(self, action): # TODO Clean this up if np.isnan(np.sum(action)): print('action', action) raise RuntimeError('Action has NaN entries') action = np.clip(action, -1, +1) # TODO Clean this up xyz_action = action[:3] # ee position actions abc_action = action[3:6] # ee orientation actions gripper_action = action[6] neutral_action = action[7] ee_pos, ee_quat = bullet.get_link_state( self.robot_id, self.end_effector_index) joint_states, _ = bullet.get_joint_states(self.robot_id, self.movable_joints) gripper_state = np.asarray([joint_states[-2], joint_states[-1]]) target_ee_pos = ee_pos + self.xyz_action_scale * xyz_action ee_deg = bullet.quat_to_deg(ee_quat) target_ee_deg = ee_deg + self.abc_action_scale * abc_action target_ee_quat = bullet.deg_to_quat(target_ee_deg) if self.control_mode == 'continuous': num_sim_steps = self.num_sim_steps target_gripper_state = gripper_state + \ [-self.gripper_action_scale * gripper_action, self.gripper_action_scale * gripper_action] elif self.control_mode == 'discrete_gripper': if gripper_action > 0.5 and not self.is_gripper_open: num_sim_steps = self.num_sim_steps_discrete_action target_gripper_state = GRIPPER_OPEN_STATE self.is_gripper_open = True # TODO(avi): Clean this up elif gripper_action < -0.5 and self.is_gripper_open: num_sim_steps = self.num_sim_steps_discrete_action target_gripper_state = GRIPPER_CLOSED_STATE self.is_gripper_open = False # TODO(avi): Clean this up else: num_sim_steps = self.num_sim_steps if self.is_gripper_open: target_gripper_state = GRIPPER_OPEN_STATE else: target_gripper_state = GRIPPER_CLOSED_STATE # target_gripper_state = gripper_state else: raise NotImplementedError target_ee_pos = np.clip(target_ee_pos, self.ee_pos_low, self.ee_pos_high) target_gripper_state = np.clip(target_gripper_state, GRIPPER_LIMITS_LOW, GRIPPER_LIMITS_HIGH) bullet.apply_action_ik( target_ee_pos, target_ee_quat, target_gripper_state, self.robot_id, self.end_effector_index, self.movable_joints, lower_limit=JOINT_LIMIT_LOWER, upper_limit=JOINT_LIMIT_UPPER, rest_pose=RESET_JOINT_VALUES, joint_range=JOINT_RANGE, num_sim_steps=num_sim_steps) if self.use_neutral_action and neutral_action > 0.5: if self.neutral_gripper_open: bullet.move_to_neutral( self.robot_id, self.reset_joint_indices, RESET_JOINT_VALUES) else: bullet.move_to_neutral( self.robot_id, self.reset_joint_indices, RESET_JOINT_VALUES_GRIPPER_CLOSED) info = self.get_info() reward = self.get_reward(info) done = False return self.get_observation(), reward, done, info
def get_action(self): ee_pos, _ = bullet.get_link_state(self.env.robot_id, self.env.end_effector_index) handle_pos = self.env.get_drawer_handle_pos() + self.handle_offset gripper_handle_dist = np.linalg.norm(handle_pos - ee_pos) gripper_handle_xy_dist = np.linalg.norm(handle_pos[:2] - ee_pos[:2]) object_pos, _ = bullet.get_object_position( self.env.objects[self.object_to_target]) object_lifted = object_pos[2] > self.pick_height_thresh gripper_pickpoint_dist = np.linalg.norm(self.pick_point - ee_pos) done = False if (gripper_handle_xy_dist > self.gripper_xy_dist_thresh and not self.env.is_drawer_open()) and not self.reached_grasp: print('xy - approaching handle') action_xyz = (handle_pos - ee_pos) * 7.0 action_xyz = list(action_xyz[:2]) + [0.] # don't droop down. action_angles = [0., 0., 0.] action_gripper = [0.0] elif (gripper_handle_dist > self.gripper_dist_thresh and not self.env.is_drawer_open()) and not self.reached_grasp: # moving down toward handle action_xyz = (handle_pos - ee_pos) * 7.0 action_angles = [0., 0., 0.] action_gripper = [0.0] elif not self.env.is_drawer_open() and not self.reached_grasp: print("opening drawer") x_command = (-1)**(1 - self.env.left_opening) action_xyz = np.array([x_command, 0, 0]) # action = np.asarray([0., 0., 0.7]) action_angles = [0., 0., 0.] action_gripper = [0.0] elif (np.abs(ee_pos[2] - self.ending_height_thresh) > self.gripper_dist_thresh) and self.t_upward >= 0: # print("Lift upward") self.drawer_never_opened = False action_xyz = np.array([0, 0, 0.7]) # force upward action action_angles = [0., 0., 0.] action_gripper = [0.0] self.t_upward -= 1 else: self.reached_grasp = True if gripper_pickpoint_dist > 0.02 and self.env.is_gripper_open: # move near the object print('move to object') print(self.pick_point) action_xyz = (self.pick_point - ee_pos) * self.xyz_action_scale xy_diff = np.linalg.norm(action_xyz[:2] / self.xyz_action_scale) if xy_diff > 0.03: action_xyz[2] = 0.0 action_angles = [0., 0., 0.] action_gripper = [0.] elif self.env.is_gripper_open: # near the object enough, performs grasping action print('grasp') action_xyz = (self.pick_point - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [-0.7] elif not object_lifted: # lifting objects above the height threshold for picking action_xyz = (self.env.ee_pos_init - ee_pos) * self.xyz_action_scale action_angles = [0., 0., 0.] action_gripper = [0.] else: # Hold action_xyz = (0., 0., 0.) action_angles = [0., 0., 0.] action_gripper = [0.] agent_info = dict(done=done) action = np.concatenate((action_xyz, action_angles, action_gripper)) return action, agent_info