def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ (obs, act, _, _), _ = dataset.sample() img = self.get_image(obs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def train(self, dataset, num_iter, writer, validation_dataset=None): """Train on dataset for a specific number of iterations.""" del validation_dataset for i in range(num_iter): obs, act, _ = dataset.random_sample() # Get heightmap from RGB-D images. configs = act['camera_config'] colormap, heightmap = self.get_heightmap(obs, configs) # Get training labels from data sample. pose0, pose1 = act['params']['pose0'], act['params']['pose1'] p0_position, p0_rotation = pose0[0], pose0[1] p0 = utils.xyz_to_pix(p0_position, self.bounds, self.pixel_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_rotation)[2]) p1_position, p1_rotation = pose1[0], pose1[1] p1 = utils.xyz_to_pix(p1_position, self.bounds, self.pixel_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_rotation)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Do data augmentation (perturb rotation and translation). input_image, _, roundedpixels, _ = utils.perturb( input_image, [p0, p1]) p0, p1 = roundedpixels # Compute training loss. loss0 = self.pick_model.train(input_image, p0, theta=0) loss1 = self.place_model.train(input_image, p1, theta=0) loss2 = self.match_model.train(input_image, p0, p1, theta=p1_theta) with writer.as_default(): tf.summary.scalar('pick_loss', self.pick_model.metric.result(), step=self.total_iter + i) tf.summary.scalar('place_loss', self.place_model.metric.result(), step=self.total_iter + i) tf.summary.scalar('match_loss', self.match_model.metric.result(), step=self.total_iter + i) print( f'Train Iter: {self.total_iter + i} Loss: {loss0:.4f} {loss1:.4f} {loss2:.4f}' ) self.total_iter += num_iter self.save()
def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ # do: get current and goal observation here. (obs, act, _, _), (gobs, _, _, _) = dataset.sample() # do: visualize. # import cv2 # img = obs['color'][0, :, :, :3] # gimg = gobs['color'][0, :, :, :3] # cv2.imshow('haha', img) # cv2.waitKey(0) # cv2.imshow('haha', gimg) # cv2.waitKey(0) img = self.get_image(obs) gimg = self.get_image(gobs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. augment = False print('no augment') if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) gimg, _, _, _ = utils.perturb(gimg, [p0, p1]) return img, p0, p0_theta, p1, p1_theta, gimg
def extract_x_y_theta(self, object_info, t_worldaug_world=None, preserve_theta=False): """Extract in-plane theta.""" object_position = object_info[0] object_quat_xyzw = object_info[1] if t_worldaug_world is not None: object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0], object_quat_xyzw[1], object_quat_xyzw[2]) t_world_object = quaternions.quat2mat(object_quat_wxyz) t_world_object[0:3, 3] = np.array(object_position) t_worldaug_object = t_worldaug_world @ t_world_object object_quat_wxyz = quaternions.mat2quat(t_worldaug_object) if not preserve_theta: object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2], object_quat_wxyz[3], object_quat_wxyz[0]) object_position = t_worldaug_object[0:3, 3] object_xy = object_position[0:2] object_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(object_quat_xyzw) [2]) / self.theta_scale return np.hstack( (object_xy, object_theta)).astype(np.float32), object_position, object_quat_xyzw
def get_six_dof_object(self, object_info, t_worldaug_world=None): """Calculate the pose of 6DOF object.""" object_position = object_info[0] object_quat_xyzw = object_info[1] if t_worldaug_world is not None: object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0], object_quat_xyzw[1], object_quat_xyzw[2]) t_world_object = quaternions.quat2mat(object_quat_wxyz) t_world_object[0:3, 3] = np.array(object_position) t_worldaug_object = t_worldaug_world @ t_world_object object_quat_wxyz = quaternions.mat2quat( t_worldaug_object) object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2], object_quat_wxyz[3], object_quat_wxyz[0]) object_position = t_worldaug_object[0:3, 3] euler = utils.quatXYZW_to_eulerXYZ(object_quat_xyzw) roll = euler[0] pitch = euler[1] theta = -euler[2] return np.asarray([ object_position[0], object_position[1], object_position[2], roll, pitch, theta ])
def extract_x_y_theta(self, object_info, t_worldaug_world=None, preserve_theta=False): """Extract in-plane theta.""" object_position = object_info[0] object_quat_xyzw = object_info[1] if t_worldaug_world is not None: object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0], object_quat_xyzw[1], object_quat_xyzw[2]) t_world_object = quaternions.quat2mat(object_quat_wxyz) #t_world_object[0:3, 3] = np.array(object_position) p0_position_2d = np.reshape(object_position, (3, 1)) t_world_object = np.append(t_world_object, p0_position_2d, axis=1) arr = np.transpose(np.array([[0], [0], [0], [1]])) t_world_object = np.append(t_world_object, arr, axis=0) t_worldaug_object = t_worldaug_world @ t_world_object t_worldaug_object = t_worldaug_object[0:3, 0:3] object_quat_wxyz = quaternions.mat2quat(t_worldaug_object) if not preserve_theta: object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2], object_quat_wxyz[3], object_quat_wxyz[0]) t_worldaug_object_two = t_worldaug_world @ t_world_object object_position = t_worldaug_object_two[0:3, 3] object_xy = object_position[0:2] object_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(object_quat_xyzw)[2]) / self.theta_scale return np.hstack((object_xy, object_theta)).astype( np.float32), object_position, object_quat_xyzw
def is_match(self, pose0, pose1, symmetry): """Check if pose0 and pose1 match within a threshold.""" # Get translational error. diff_pos = np.float32(pose0[0][:2]) - np.float32(pose1[0][:2]) dist_pos = np.linalg.norm(diff_pos) # Get rotational error around z-axis (account for symmetries). diff_rot = 0 if symmetry > 0: rot0 = np.array(utils.quatXYZW_to_eulerXYZ(pose0[1]))[2] rot1 = np.array(utils.quatXYZW_to_eulerXYZ(pose1[1]))[2] diff_rot = np.abs(rot0 - rot1) % symmetry if diff_rot > (symmetry / 2): diff_rot = symmetry - diff_rot return (dist_pos < self.pos_eps) and (diff_rot < self.rot_eps)
def get_sample(self, dataset, augment=True): """Get a dataset sample. Args: dataset: a ravens.Dataset (train or validation) augment: if True, perform data augmentation. Returns: tuple of data for training: (input_image, p0, p0_theta, p1, p1_theta) tuple additionally includes (z, roll, pitch) if self.six_dof if self.use_goal_image, then the goal image is stacked with the current image in `input_image`. If splitting up current and goal images is desired, it should be done outside this method. """ (obs, act, _, _), _ = dataset.sample() # do: obs here are still three multi-view images. # import cv2 # for i in range(3): # rgb = obs['color'][i, ...] # cv2.imshow('haha', rgb) # cv2.waitKey(0) # exit(0) img = self.get_image(obs) # do: image has changed to top-down! # import cv2 # cv2.imshow('haha', img[:, :, :3]) # cv2.waitKey(0) # exit(0) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def get_sample(self, dataset, augment=True): (obs, act, _, _), _ = dataset.sample() img = self.get_image(obs) # Get training labels from data sample. p0_xyz, p0_xyzw = act['pose0'] p1_xyz, p1_xyzw = act['pose1'] p0 = utils.xyz_to_pix(p0_xyz, self.bounds, self.pix_size) p0_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p0_xyzw)[2]) p1 = utils.xyz_to_pix(p1_xyz, self.bounds, self.pix_size) p1_theta = -np.float32(utils.quatXYZW_to_eulerXYZ(p1_xyzw)[2]) p1_theta = p1_theta - p0_theta p0_theta = 0 # Data augmentation. if augment: img, _, (p0, p1), _ = utils.perturb(img, [p0, p1]) return img, p0, p0_theta, p1, p1_theta
def get_six_dof_act(self, transform_params, heightmap, pose0, pose1): """Adjust SE(3) poses via the in-plane SE(2) augmentation transform.""" p1_position, p1_rotation = pose1[0], pose1[1] p0_position, p0_rotation = pose0[0], pose0[1] if transform_params is not None: t_world_center, t_world_centernew = utils.get_se3_from_image_transform( transform_params[0], transform_params[1], transform_params[2], heightmap, self.bounds, self.pixel_size) t_worldnew_world = t_world_centernew @ np.linalg.inv(t_world_center) else: t_worldnew_world = np.eye(4) p1_quat_wxyz = (p1_rotation[3], p1_rotation[0], p1_rotation[1], p1_rotation[2]) t_world_p1 = quaternions.quat2mat(p1_quat_wxyz) t_world_p1[0:3, 3] = np.array(p1_position) t_worldnew_p1 = t_worldnew_world @ t_world_p1 p0_quat_wxyz = (p0_rotation[3], p0_rotation[0], p0_rotation[1], p0_rotation[2]) t_world_p0 = quaternions.quat2mat(p0_quat_wxyz) t_world_p0[0:3, 3] = np.array(p0_position) t_worldnew_p0 = t_worldnew_world @ t_world_p0 t_worldnew_p0theta0 = t_worldnew_p0 * 1.0 t_worldnew_p0theta0[0:3, 0:3] = np.eye(3) # PLACE FRAME, adjusted for this 0 rotation on pick t_p0_p0theta0 = np.linalg.inv(t_worldnew_p0) @ t_worldnew_p0theta0 t_worldnew_p1theta0 = t_worldnew_p1 @ t_p0_p0theta0 # convert the above rotation to euler quatwxyz_worldnew_p1theta0 = quaternions.mat2quat( t_worldnew_p1theta0) q = quatwxyz_worldnew_p1theta0 quatxyzw_worldnew_p1theta0 = (q[1], q[2], q[3], q[0]) p1_rotation = quatxyzw_worldnew_p1theta0 p1_euler = utils.quatXYZW_to_eulerXYZ(p1_rotation) roll_scaled = p1_euler[0] / self.theta_scale pitch_scaled = p1_euler[1] / self.theta_scale p1_theta_scaled = -p1_euler[2] / self.theta_scale x = p1_position[0] y = p1_position[1] z = p1_position[2] return np.array([x, y, p1_theta_scaled, roll_scaled, pitch_scaled, z])
def get_sample_place(self, dataset, augment=True): (obs, act, _, _), _ = dataset.sample() img_place = self.get_image_place(obs) # Get training labels from data sample. p0_xyz_place, p0_xyzw_place = act['pose0'] p1_xyz_place, p1_xyzw_place = act['pose1'] p0_place = utils.xyz_to_pix(p0_xyz_place, self.bounds_place, self.pix_size) p0_theta_place = -np.float32( utils.quatXYZW_to_eulerXYZ(p0_xyzw_place)[2]) p1_place = utils.xyz_to_pix(p1_xyz_place, self.bounds_place, self.pix_size) p1_theta_place = -np.float32( utils.quatXYZW_to_eulerXYZ(p1_xyzw_place)[2]) p1_theta_place = p1_theta_place - p0_theta_place p0_theta_place = 0 # Data augmentation. if augment: img_place, _, (p0_place, p1_place), _ = utils.perturb( img_place, [p0_place, p1_place]) return img_place, p0_place, p0_theta_place, p1_place, p1_theta_place
def reset(self, env): super().reset(env) # Generate randomly shaped box. box_size = self.get_random_size(0.05, 0.15, 0.05, 0.15, 0.01, 0.06) # Add corner. dimx = (box_size[0] / 2 - 0.025 + 0.0025, box_size[0] / 2 + 0.0025) dimy = (box_size[1] / 2 + 0.0025, box_size[1] / 2 - 0.025 + 0.0025) corner_template = 'corner/corner-template.urdf' replace = {'DIMX': dimx, 'DIMY': dimy} corner_urdf = self.fill_template(corner_template, replace) corner_size = (box_size[0], box_size[1], 0) corner_pose = self.get_random_pose(env, corner_size) env.add_object(corner_urdf, corner_pose, 'fixed') os.remove(corner_urdf) # Add possible placing poses. theta = utils.quatXYZW_to_eulerXYZ(corner_pose[1])[2] fip_rot = utils.eulerXYZ_to_quatXYZW((0, 0, theta + np.pi)) pose1 = (corner_pose[0], fip_rot) alt_x = (box_size[0] / 2) - (box_size[1] / 2) alt_y = (box_size[1] / 2) - (box_size[0] / 2) alt_pos = (alt_x, alt_y, 0) alt_rot0 = utils.eulerXYZ_to_quatXYZW((0, 0, np.pi / 2)) alt_rot1 = utils.eulerXYZ_to_quatXYZW((0, 0, 3 * np.pi / 2)) pose2 = utils.multiply(corner_pose, (alt_pos, alt_rot0)) pose3 = utils.multiply(corner_pose, (alt_pos, alt_rot1)) # Add box. box_template = 'box/box-template.urdf' box_urdf = self.fill_template(box_template, {'DIM': box_size}) box_pose = self.get_random_pose(env, box_size) box_id = env.add_object(box_urdf, box_pose) os.remove(box_urdf) self.color_random_brown(box_id) # Goal: box is aligned with corner (1 of 4 possible poses). self.goals.append( ([(box_id, (2 * np.pi, None))], np.int32([[1, 1, 1, 1]]), [corner_pose, pose1, pose2, pose3], False, True, 'pose', None, 1))
def get_six_dof(self, transform_params, heightmap, pose0, pose1, augment=True): """Adjust SE(3) poses via the in-plane SE(2) augmentation transform.""" debug_visualize = False p1_position, p1_rotation = pose1[0], pose1[1] p0_position, p0_rotation = pose0[0], pose0[1] if debug_visualize: self.vis = utils.create_visualizer() self.transport_model.vis = self.vis if augment: t_world_center, t_world_centernew = utils.get_se3_from_image_transform( *transform_params, heightmap, self.bounds, self.pixel_size) if debug_visualize: label = 't_world_center' utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0) self.vis[label].set_transform(t_world_center) label = 't_world_centernew' utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0) self.vis[label].set_transform(t_world_centernew) t_worldnew_world = t_world_centernew @ np.linalg.inv(t_world_center) else: t_worldnew_world = np.eye(4) p1_quat_wxyz = (p1_rotation[3], p1_rotation[0], p1_rotation[1], p1_rotation[2]) t_world_p1 = quaternions.quat2mat(p1_quat_wxyz) t_world_p1[0:3, 3] = np.array(p1_position) t_worldnew_p1 = t_worldnew_world @ t_world_p1 p0_quat_wxyz = (p0_rotation[3], p0_rotation[0], p0_rotation[1], p0_rotation[2]) t_world_p0 = quaternions.quat2mat(p0_quat_wxyz) t_world_p0[0:3, 3] = np.array(p0_position) t_worldnew_p0 = t_worldnew_world @ t_world_p0 if debug_visualize: label = 't_worldnew_p1' utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0) self.vis[label].set_transform(t_worldnew_p1) label = 't_world_p1' utils.make_frame(self.vis, label, h=0.05, radius=0.0012, o=1.0) self.vis[label].set_transform(t_world_p1) label = 't_worldnew_p0-0thetaoriginally' utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0) self.vis[label].set_transform(t_worldnew_p0) # PICK FRAME, using 0 rotation due to suction rotational symmetry t_worldnew_p0theta0 = t_worldnew_p0 * 1.0 t_worldnew_p0theta0[0:3, 0:3] = np.eye(3) if debug_visualize: label = 'PICK' utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0) self.vis[label].set_transform(t_worldnew_p0theta0) # PLACE FRAME, adjusted for this 0 rotation on pick t_p0_p0theta0 = np.linalg.inv(t_worldnew_p0) @ t_worldnew_p0theta0 t_worldnew_p1theta0 = t_worldnew_p1 @ t_p0_p0theta0 if debug_visualize: label = 'PLACE' utils.make_frame(self.vis, label, h=0.05, radius=0.0021, o=1.0) self.vis[label].set_transform(t_worldnew_p1theta0) # convert the above rotation to euler quatwxyz_worldnew_p1theta0 = quaternions.mat2quat(t_worldnew_p1theta0) q = quatwxyz_worldnew_p1theta0 quatxyzw_worldnew_p1theta0 = (q[1], q[2], q[3], q[0]) p1_rotation = quatxyzw_worldnew_p1theta0 p1_euler = utils.quatXYZW_to_eulerXYZ(p1_rotation) roll = p1_euler[0] pitch = p1_euler[1] p1_theta = -p1_euler[2] p0_theta = 0 z = p1_position[2] return p0_theta, p1_theta, z, roll, pitch
def act(self, obs, info, compute_error=False, gt_act=None): """Run inference and return best action given visual observations.""" # Get heightmap from RGB-D images. colormap, heightmap = self.get_heightmap(obs, self.camera_config) # Concatenate color with depth images. input_image = np.concatenate( (colormap, heightmap[Ellipsis, None], heightmap[Ellipsis, None], heightmap[Ellipsis, None]), axis=2) # Attention model forward pass. attention = self.attention_model.forward(input_image) argmax = np.argmax(attention) argmax = np.unravel_index(argmax, shape=attention.shape) p0_pixel = argmax[:2] p0_theta = argmax[2] * (2 * np.pi / attention.shape[2]) # Transport model forward pass. transport = self.transport_model.forward(input_image, p0_pixel) _, z, roll, pitch = self.rpz_model.forward(input_image, p0_pixel) argmax = np.argmax(transport) argmax = np.unravel_index(argmax, shape=transport.shape) # Index into 3D discrete tensor, grab z, roll, pitch activations z_best = z[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] roll_best = roll[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] pitch_best = pitch[:, argmax[0], argmax[1], argmax[2]][Ellipsis, None] # Send through regressors for each of z, roll, pitch z_best = self.rpz_model.z_regressor(z_best)[0, 0] roll_best = self.rpz_model.roll_regressor(roll_best)[0, 0] pitch_best = self.rpz_model.pitch_regressor(pitch_best)[0, 0] p1_pixel = argmax[:2] p1_theta = argmax[2] * (2 * np.pi / transport.shape[2]) # Pixels to end effector poses. p0_position = utils.pix_to_xyz(p0_pixel, heightmap, self.bounds, self.pixel_size) p1_position = utils.pix_to_xyz(p1_pixel, heightmap, self.bounds, self.pixel_size) p1_position = (p1_position[0], p1_position[1], z_best) p0_rotation = utils.eulerXYZ_to_quatXYZW((0, 0, -p0_theta)) p1_rotation = utils.eulerXYZ_to_quatXYZW( (roll_best, pitch_best, -p1_theta)) if compute_error: gt_p0_position, gt_p0_rotation = gt_act['params']['pose0'] gt_p1_position, gt_p1_rotation = gt_act['params']['pose1'] gt_p0_pixel = np.array( utils.xyz_to_pix(gt_p0_position, self.bounds, self.pixel_size)) gt_p1_pixel = np.array( utils.xyz_to_pix(gt_p1_position, self.bounds, self.pixel_size)) self.p0_pixel_error(np.linalg.norm(gt_p0_pixel - np.array(p0_pixel))) self.p1_pixel_error(np.linalg.norm(gt_p1_pixel - np.array(p1_pixel))) gt_p0_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p0_rotation)[2]) gt_p1_theta = -np.float32( utils.quatXYZW_to_eulerXYZ(gt_p1_rotation)[2]) self.p0_theta_error( abs((np.rad2deg(gt_p0_theta - p0_theta) + 180) % 360 - 180)) self.p1_theta_error( abs((np.rad2deg(gt_p1_theta - p1_theta) + 180) % 360 - 180)) return None return { 'pose0': (np.asarray(p0_position), np.asarray(p0_rotation)), 'pose1': (np.asarray(p1_position), np.asarray(p1_rotation)) }
def act(obs, info): # pylint: disable=unused-argument """Calculate action.""" # Oracle uses perfect RGB-D orthographic images and segmentation masks. _, hmap, obj_mask = self.get_true_image(env) # Unpack next goal step. objs, matches, targs, replace, rotations, _, _, _ = self.goals[0] # Match objects to targets without replacement. if not replace: # Modify a copy of the match matrix. matches = matches.copy() # Ignore already matched objects. for i in range(len(objs)): object_id, (symmetry, _) = objs[i] pose = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) for j in targets_i: # SAY: check whether the object arrives its target. if self.is_match(pose, targs[j], symmetry): matches[i, :] = 0 matches[:, j] = 0 # Get objects to be picked (prioritize farthest from nearest neighbor). nn_dists = [] nn_targets = [] for i in range(len(objs)): object_id, (symmetry, _) = objs[i] xyz, _ = p.getBasePositionAndOrientation(object_id) targets_i = np.argwhere(matches[i, :]).reshape(-1) if len(targets_i) > 0: # pylint: disable=g-explicit-length-test targets_xyz = np.float32([targs[j][0] for j in targets_i]) dists = np.linalg.norm( targets_xyz - np.float32(xyz).reshape(1, 3), axis=1) nn = np.argmin(dists) nn_dists.append(dists[nn]) nn_targets.append(targets_i[nn]) # Handle ignored objects. else: nn_dists.append(0) nn_targets.append(-1) order = np.argsort(nn_dists)[::-1] # SAY: matched objects may be the ones that have been at the target location. # Filter out matched objects. order = [i for i in order if nn_dists[i] > 0] pick_mask = None for pick_i in order: pick_mask = np.uint8(obj_mask == objs[pick_i][0]) # Erode to avoid picking on edges. # pick_mask = cv2.erode(pick_mask, np.ones((3, 3), np.uint8)) if np.sum(pick_mask) > 0: break # Trigger task reset if no object is visible. if pick_mask is None or np.sum(pick_mask) == 0: self.goals = [] print('Object for pick is not visible. Skipping demonstration.') return # Get picking pose. pick_prob = np.float32(pick_mask) pick_pix = utils.sample_distribution(pick_prob) # For "deterministic" demonstrations on insertion-easy, use this: # pick_pix = (160,80) pick_pos = utils.pix_to_xyz(pick_pix, hmap, self.bounds, self.pix_size) pick_pose = (np.asarray(pick_pos), np.asarray((0, 0, 0, 1))) # Get placing pose. targ_pose = targs[nn_targets[pick_i]] # pylint: disable=undefined-loop-variable obj_pose = p.getBasePositionAndOrientation(objs[pick_i][0]) # pylint: disable=undefined-loop-variable if not self.sixdof: obj_euler = utils.quatXYZW_to_eulerXYZ(obj_pose[1]) obj_quat = utils.eulerXYZ_to_quatXYZW((0, 0, obj_euler[2])) obj_pose = (obj_pose[0], obj_quat) world_to_pick = utils.invert(pick_pose) obj_to_pick = utils.multiply(world_to_pick, obj_pose) pick_to_obj = utils.invert(obj_to_pick) place_pose = utils.multiply(targ_pose, pick_to_obj) # Rotate end effector? if not rotations: place_pose = (place_pose[0], (0, 0, 0, 1)) place_pose = (np.asarray(place_pose[0]), np.asarray(place_pose[1])) return {'pose0': pick_pose, 'pose1': place_pose}