def act(self, obs, info=None, goal=None): # pylint: disable=unused-argument """Run inference and return best action given visual observations.""" tf.keras.backend.set_learning_phase(0) # Get heightmap from RGB-D images. img = self.get_image(obs) # Attention model forward pass. pick_conf = self.attention.forward(img) argmax = np.argmax(pick_conf) argmax = np.unravel_index(argmax, shape=pick_conf.shape) p0_pix = argmax[:2] p0_theta = argmax[2] * (2 * np.pi / pick_conf.shape[2]) # Transport model forward pass. place_conf = self.transport.forward(img, p0_pix) argmax = np.argmax(place_conf) argmax = np.unravel_index(argmax, shape=place_conf.shape) p1_pix = argmax[:2] p1_theta = argmax[2] * (2 * np.pi / place_conf.shape[2]) # Pixels to end effector poses. hmap = img[:, :, 3] p0_xyz = utils.pix_to_xyz(p0_pix, hmap, self.bounds, self.pix_size) p1_xyz = utils.pix_to_xyz(p1_pix, hmap, self.bounds, self.pix_size) p0_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_xyzw = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta)) return { 'pose0': (np.asarray(p0_xyz), np.asarray(p0_xyzw)), 'pose1': (np.asarray(p1_xyz), np.asarray(p1_xyzw)) }
def get_random_pose(self, env, obj_size): """Get random collision-free object pose within workspace bounds.""" # Get erosion size of object in pixels. max_size = np.sqrt(obj_size[0] ** 2 + obj_size[1] ** 2) erode_size = int(np.round(max_size / self.pix_size)) _, hmap, obj_mask = self.get_true_image(env) # Randomly sample an object pose within free-space pixels. free = np.ones(obj_mask.shape, dtype=np.uint8) for obj_ids in env.obj_ids.values(): for obj_id in obj_ids: free[obj_mask == obj_id] = 0 free[0, :], free[:, 0], free[-1, :], free[:, -1] = 0, 0, 0, 0 free = cv2.erode(free, np.ones((erode_size, erode_size), np.uint8)) if np.sum(free) == 0: return None, None pix = utils.sample_distribution(np.float32(free)) pos = utils.pix_to_xyz(pix, hmap, self.bounds, self.pix_size) pos = (pos[0], pos[1], obj_size[2] / 2) theta = np.random.rand() * 2 * np.pi rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta)) return pos, rot
def act(self, obs, info, compute_error=False, gt_act=None): """Run inference and return best action given visual observations.""" # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Attention model forward pass. attention = self.attention_model.forward(input_image) argmax = np.argmax(attention) argmax = np.unravel_index(argmax, shape=attention.shape) p0_pixel = argmax[:2] p0_theta = argmax[2] * (2 * np.pi / attention.shape[2]) # Transport model forward pass. transport = self.transport_model.forward(input_image, p0_pixel) _, z, roll, pitch = self.rpz_model.forward(input_image, p0_pixel) argmax = np.argmax(transport) argmax = np.unravel_index(argmax, shape=transport.shape) # Index into 3D discrete tensor, grab z, roll, pitch activations z_best = z[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] roll_best = roll[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] pitch_best = pitch[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] # Send through regressors for each of z, roll, pitch z_best = self.rpz_model.z_regressor(z_best)[0, 0] roll_best = self.rpz_model.roll_regressor(roll_best)[0, 0] pitch_best = self.rpz_model.pitch_regressor(pitch_best)[0, 0] p1_pixel = argmax[:2] p1_theta = argmax[2] * (2 * np.pi / transport.shape[2]) # Pixels to end effector poses. p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds, self.pixel_size) p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds, self.pixel_size) p1_position = (p1_position[0], p1_position[1], z_best) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (roll_best, pitch_best, -p1_theta)) if compute_error: gt_p0_position, gt_p0_rotation = gt_act['params']['pose0'] gt_p1_position, gt_p1_rotation = gt_act['params']['pose1'] gt_p0_pixel = np.array( utils.xyz_to_pix(gt_p0_position, self.bounds, self.pixel_size)) gt_p1_pixel = np.array( utils.xyz_to_pix(gt_p1_position, self.bounds, self.pixel_size)) self.p0_pixel_error(np.linalg.norm(gt_p0_pixel - np.array(p0_pixel))) self.p1_pixel_error(np.linalg.norm(gt_p1_pixel - np.array(p1_pixel))) gt_p0_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p0_rotation)[2]) gt_p1_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p1_rotation)[2]) self.p0_theta_error( abs((np.rad2deg(gt_p0_theta - p0_theta) + 180) % 360 - 180)) self.p1_theta_error( abs((np.rad2deg(gt_p1_theta - p1_theta) + 180) % 360 - 180)) return None return { 'pose0': (np.asarray(p0_position), np.asarray(p0_rotation)), 'pose1': (np.asarray(p1_position), np.asarray(p1_rotation)) }
def act(obs, info): # pylint: disable=unused-argument """Calculate action.""" # Oracle uses perfect RGB-D orthographic images and segmentation masks. _, hmap, obj_mask = self.get_true_image(env) # Unpack next goal step. objs, matches, targs, replace, rotations, _, _, _ = self.goals[0] # Match objects to targets without replacement. if not replace: # Modify a copy of the match matrix. matches = matches.copy() # Ignore already matched objects. for i in range(len(objs)): object_id, (symmetry, _) = objs[i] pose = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) for j in targets_i: # SAY: check whether the object arrives its target. if self.is_match(pose, targs[j], symmetry): matches[i, :] = 0 matches[:, j] = 0 # Get objects to be picked (prioritize farthest from nearest neighbor). nn_dists = [] nn_targets = [] for i in range(len(objs)): object_id, (symmetry, _) = objs[i] xyz, _ = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) if len(targets_i) > 0: # pylint: disable=g-explicit-length-test targets_xyz = np.float32([targs[j][0] for j in targets_i]) dists = np.linalg.norm( targets_xyz - np.float32(xyz).reshape(1, 3), axis=1) nn = np.argmin(dists) nn_dists.append(dists[nn]) nn_targets.append(targets_i[nn]) # Handle ignored objects. else: nn_dists.append(0) nn_targets.append(-1) order = np.argsort(nn_dists)[::-1] # SAY: matched objects may be the ones that have been at the target location. # Filter out matched objects. order = [i for i in order if nn_dists[i] > 0] pick_mask = None for pick_i in order: pick_mask = np.uint8(obj_mask == objs[pick_i][0]) # Erode to avoid picking on edges. # pick_mask = cv2.erode(pick_mask, np.ones((3, 3), np.uint8)) if np.sum(pick_mask) > 0: break # Trigger task reset if no object is visible. if pick_mask is None or np.sum(pick_mask) == 0: self.goals = [] print('Object for pick is not visible. Skipping demonstration.') return # Get picking pose. pick_prob = np.float32(pick_mask) pick_pix = utils.sample_distribution(pick_prob) # For "deterministic" demonstrations on insertion-easy, use this: # pick_pix = (160,80) pick_pos = utils.pix_to_xyz(pick_pix, hmap, self.bounds, self.pix_size) pick_pose = (np.asarray(pick_pos), np.asarray((0, 0, 0, 1))) # Get placing pose. targ_pose = targs[nn_targets[pick_i]] # pylint: disable=undefined-loop-variable obj_pose = p.getBasePositionAndOrientation(objs[pick_i][0]) # pylint: disable=undefined-loop-variable if not self.sixdof: obj_euler = utils.quatXYZW_to_eulerXYZ(obj_pose[1]) obj_quat = utils.eulerXYZ_to_quatXYZW((0, 0, obj_euler[2])) obj_pose = (obj_pose[0], obj_quat) world_to_pick = utils.invert(pick_pose) obj_to_pick = utils.multiply(world_to_pick, obj_pose) pick_to_obj = utils.invert(obj_to_pick) place_pose = utils.multiply(targ_pose, pick_to_obj) # Rotate end effector? if not rotations: place_pose = (place_pose[0], (0, 0, 0, 1)) place_pose = (np.asarray(place_pose[0]), np.asarray(place_pose[1])) return {'pose0': pick_pose, 'pose1': place_pose}
def act(self, obs, info): """Run inference and return best action given visual observations.""" del info act = {'camera_config': self.camera_config, 'primitive': None} if not obs: return act # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Get top-k pixels from pick and place heatmaps. k = 100 pick_heatmap = self.pick_model.forward(input_image, apply_softmax=True).squeeze() place_heatmap = self.place_model.forward(input_image, apply_softmax=True).squeeze() descriptors = np.float32(self.match_model.forward(input_image)) # V4 pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0) place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0) pick_topk = np.int32( np.unravel_index( np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T pick_pixel = pick_topk[-1, :] from skimage.feature import peak_local_max # pylint: disable=g-import-not-at-top place_peaks = peak_local_max(place_heatmap, num_peaks=1) distances = np.ones((place_peaks.shape[0], self.num_rotations)) * 10 pick_descriptor = descriptors[0, pick_pixel[0], pick_pixel[1], :].reshape(1, -1) for i in range(place_peaks.shape[0]): peak = place_peaks[i, :] place_descriptors = descriptors[:, peak[0], peak[1], :] distances[i, :] = np.linalg.norm(place_descriptors - pick_descriptor, axis=1) ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) p0_pixel = pick_pixel p0_theta = 0 p1_pixel = place_peaks[ibest[0], :] p1_theta = ibest[1] * (2 * np.pi / self.num_rotations) # # V3 # pick_heatmap = cv2.GaussianBlur(pick_heatmap, (49, 49), 0) # place_heatmap = cv2.GaussianBlur(place_heatmap, (49, 49), 0) # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # pick_pixel = pick_topk[-1, :] # place_pixel = place_topk[-1, :] # pick_descriptor = descriptors[0, pick_pixel[0], # pick_pixel[1], :].reshape(1, -1) # place_descriptor = descriptors[:, place_pixel[0], place_pixel[1], :] # distances = np.linalg.norm(place_descriptor - pick_descriptor, axis=1) # irotation = np.argmin(distances) # p0_pixel = pick_pixel # p0_theta = 0 # p1_pixel = place_pixel # p1_theta = irotation * (2 * np.pi / self.num_rotations) # # V2 # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # pick_pixel = pick_topk[-1, :] # pick_descriptor = descriptors[0, pick_pixel[0], # pick_pixel[1], :].reshape(1, 1, 1, -1) # distances = np.linalg.norm(descriptors - pick_descriptor, axis=3) # distances = np.transpose(distances, [1, 2, 0]) # max_distance = int(np.round(np.max(distances))) # for i in range(self.num_rotations): # distances[:, :, i] = cv2.circle(distances[:, :, i], # (pick_pixel[1], pick_pixel[0]), 50, # max_distance, -1) # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) # p0_pixel = pick_pixel # p0_theta = 0 # p1_pixel = ibest[:2] # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations) # # V1 # pick_topk = np.int32( # np.unravel_index( # np.argsort(pick_heatmap.reshape(-1))[-k:], pick_heatmap.shape)).T # place_topk = np.int32( # np.unravel_index( # np.argsort(place_heatmap.reshape(-1))[-k:], # place_heatmap.shape)).T # distances = np.zeros((k, k, self.num_rotations)) # for ipick in range(k): # pick_descriptor = descriptors[0, pick_topk[ipick, 0], # pick_topk[ipick, 1], :].reshape(1, -1) # for iplace in range(k): # place_descriptors = descriptors[:, place_topk[iplace, 0], # place_topk[iplace, 1], :] # distances[ipick, iplace, :] = np.linalg.norm( # place_descriptors - pick_descriptor, axis=1) # ibest = np.unravel_index(np.argmin(distances), shape=distances.shape) # p0_pixel = pick_topk[ibest[0], :] # p0_theta = 0 # p1_pixel = place_topk[ibest[1], :] # p1_theta = ibest[2] * (2 * np.pi / self.num_rotations) # Pixels to end effector poses. p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds, self.pixel_size) p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds, self.pixel_size) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p1_theta)) act['primitive'] = 'pick_place' if self.task == 'sweeping': act['primitive'] = 'sweep' elif self.task == 'pushing': act['primitive'] = 'push' params = { 'pose0': (np.asarray(p0_position), np.asarray(p0_rotation)), 'pose1': (np.asarray(p1_position), np.asarray(p1_rotation)) } act['params'] = params return act