Пример #1
0
def icp_refinement(pts_tgt, obj_model, rot_pred, tra_pred, cam_K, ren):
    centroid_tgt = np.array([
        np.mean(pts_tgt[:, 0]),
        np.mean(pts_tgt[:, 1]),
        np.mean(pts_tgt[:, 2])
    ])
    if (tra_pred[2] < 300 or tra_pred[2] > 5000):
        tra_pred = centroid_tgt * 1000

    #opengl renderer (bit slower, more precise)
    if (gpu_rendering):
        img_init, depth_init = render_obj_gpu(obj_model, rot_pred,
                                              tra_pred / 1000, cam_K, ren)
    else:
        img_init, depth_init = render_obj(obj_models[obj_order_id], rot_pred,
                                          tra_pred / 1000, cam_K, ren)
    init_mask = depth_init > 0
    bbox_init = get_bbox_from_mask(init_mask > 0)
    tf = np.eye(4)
    if (bbox_init[2] - bbox_init[0] < 10 or bbox_init[3] - bbox_init[1] < 10):
        return tf, -1
    if (np.sum(init_mask) < 10):
        return tf, -1
    points_src = np.zeros(
        (bbox_init[2] - bbox_init[0], bbox_init[3] - bbox_init[1], 6),
        np.float32)
    points_src[:, :, :3] = getXYZ(depth_init, cam_K[0, 0], cam_K[1, 1],
                                  cam_K[0, 2], cam_K[1, 2], bbox_init)
    points_src[:, :, 3:] = get_normal(depth_init,
                                      fx=cam_K[0, 0],
                                      fy=cam_K[1, 1],
                                      cx=cam_K[0, 2],
                                      cy=cam_K[1, 2],
                                      refine=True,
                                      bbox=bbox_init)
    points_src = points_src[init_mask[bbox_init[0]:bbox_init[2],
                                      bbox_init[1]:bbox_init[3]] > 0]

    #adjust the initial translation using centroids of visible points
    centroid_src = np.array([
        np.mean(points_src[:, 0]),
        np.mean(points_src[:, 1]),
        np.mean(points_src[:, 2])
    ])
    trans_adjust = centroid_tgt - centroid_src
    tra_pred = tra_pred + trans_adjust * 1000
    points_src[:, :3] += trans_adjust

    icp_fnc = cv2.ppf_match_3d_ICP(100, tolerence=0.05, numLevels=4)  #1cm
    retval, residual, pose = icp_fnc.registerModelToScene(
        points_src.reshape(-1, 6), pts_tgt.reshape(-1, 6))

    tf[:3, :3] = rot_pred
    tf[:3, 3] = tra_pred / 1000  #in m
    tf = np.matmul(pose, tf)
    return tf, residual
Пример #2
0
 def icp_refinement(self, pts_tgt, obj_model, rot_pred, tra_pred):
     centroid_tgt = np.array([
         np.mean(pts_tgt[:, 0]),
         np.mean(pts_tgt[:, 1]),
         np.mean(pts_tgt[:, 2])
     ])
     if (tra_pred[2] < 300 or tra_pred[2] > 5000):
         #when estimated translation is weired, set centroid of tgt points as translation
         tra_pred = centroid_tgt * 1000
     img_init, depth_init = self.render_obj_pyrender(
         obj_model, rot_pred, tra_pred / 1000)
     init_mask = depth_init > 0
     bbox_init = get_bbox_from_mask(init_mask > 0)
     if (bbox_init[2] - bbox_init[0] < 10
             or bbox_init[3] - bbox_init[1] < 10):
         return tf, -1
     if (np.sum(init_mask) < 10):
         return tf, -1
     points_src = np.zeros(
         (bbox_init[2] - bbox_init[0], bbox_init[3] - bbox_init[1], 6),
         np.float32)
     points_src[:, :, :3] = getXYZ(depth_init, self.camK[0, 0],
                                   self.camK[1, 1], self.camK[0, 2],
                                   self.camK[1, 2], bbox_init)
     points_src[:, :, 3:] = get_normal(depth_init,
                                       fx=self.camK[0, 0],
                                       fy=self.camK[1, 1],
                                       cx=self.camK[0, 2],
                                       cy=self.camK[1, 2],
                                       refine=True,
                                       bbox=bbox_init)
     points_src = points_src[init_mask[bbox_init[0]:bbox_init[2],
                                       bbox_init[1]:bbox_init[3]] > 0]
     centroid_src = np.array([
         np.mean(points_src[:, 0]),
         np.mean(points_src[:, 1]),
         np.mean(points_src[:, 2])
     ])
     trans_adjust = centroid_tgt - centroid_src
     tra_pred = tra_pred + trans_adjust * 1000
     points_src[:, :3] += trans_adjust
     icp_fnc = cv2.ppf_match_3d_ICP(100, tolerence=0.05, numLevels=5)  #1cm
     retval, residual, pose = icp_fnc.registerModelToScene(
         points_src.reshape(-1, 6), pts_tgt.reshape(-1, 6))
     tf = np.eye(4)
     tf[:3, :3] = rot_pred
     tf[:3, 3] = tra_pred / 1000  #in m
     tf = np.matmul(pose, tf)
     return tf, residual
Пример #3
0
    depth_t = inout.load_depth(depth_path) / 1000 * depth_scale
    depth_t_zero_nan = np.nan_to_num(depth_t)
    t1 = time.time()
    inst_count_est = np.zeros((len(inst_counts)))
    inst_count_pred = np.zeros((len(inst_counts)))
    inst_count_good = np.zeros((len(inst_counts)))

    depth_valid = np.logical_and(depth_t > 0.2, depth_t < 2.2)
    rgb_valid = np.logical_or(depth_valid, depth_t_zero_nan == 0)
    image_t = image_t.astype(np.float32)
    image_t[np.invert(rgb_valid)] = 0.1 * image_t[np.invert(rgb_valid)]

    points_tgt = np.zeros((depth_t.shape[0], depth_t.shape[1], 6), np.float32)
    points_tgt[:, :, :3] = getXYZ(depth_t,
                                  fx=cam_K[0, 0],
                                  fy=cam_K[1, 1],
                                  cx=cam_K[0, 2],
                                  cy=cam_K[1, 2])
    points_tgt[:, :, 3:] = get_normal(depth_t,
                                      fx=cam_K[0, 0],
                                      fy=cam_K[1, 1],
                                      cx=cam_K[0, 2],
                                      cy=cam_K[1, 2],
                                      refine=True)

    if (detect_type == 'rcnn'):
        rois, obj_orders, obj_ids, scores, masks = get_rcnn_detection(
            image_t, model)
    elif (detect_type == 'retinanet'):
        rois, obj_orders, obj_ids, scores = get_retinanet_detection(
            image_t, model)
Пример #4
0
    def callback(self, r_image):
        self.sub.unregister()
        timeout = 1
        t_spend = 0
        if (self.icp):
            while not (self.have_depth):
                time.sleep(0.01)
                t_spend += 0.01
                if (t_spend > 1):
                    break
            self.sub_depth.unregister()

        with self.graph.as_default():
            depth_t = np.zeros((self.im_height, self.im_width))
            if (self.have_depth):
                depth_t = np.copy(self.depth_img)
                depth_t = np.nan_to_num(depth_t)
                depth_valid = np.logical_and(depth_t > 0.2, depth_t < 3)
                if (np.max(depth_t) == 0):
                    self.have_depth = False
                else:
                    points_tgt = np.zeros(
                        (depth_t.shape[0], depth_t.shape[1], 6), np.float32)
                    points_tgt[:, :, :3] = getXYZ(depth_t,
                                                  fx=self.camK[0, 0],
                                                  fy=self.camK[1, 1],
                                                  cx=self.camK[0, 2],
                                                  cy=self.camK[1, 2])
                    points_tgt[:, :, 3:] = get_normal(depth_t,
                                                      fx=self.camK[0, 0],
                                                      fy=self.camK[1, 1],
                                                      cx=self.camK[0, 2],
                                                      cy=self.camK[1, 2],
                                                      refine=True)
            data = ros_numpy.numpify(r_image)
            image = np.copy(data)
            bbox_pred = np.zeros((4), np.int)
            rois, obj_orders, obj_ids, scores, masks = self.get_rcnn_detection(
                image)
            result_scores = []
            result_poses = []
            result_poses_before = []
            result_ids = []
            result_bbox = []
            img_detection = np.copy(image)
            img_pose = np.copy(image)
            if (self.icp): img_pose_noicp = np.copy(image)

            for r_id, roi in enumerate(rois):
                if (roi[0] == -1 and roi[1] == -1):
                    continue
                obj_id = obj_ids[r_id]

                if (detect_type == 'rcnn'):
                    mask_from_detect = masks[:, :, r_id]
                    r_ = int(self.colors[obj_orders[r_id], 0])
                    g_ = int(self.colors[obj_orders[r_id], 1])
                    b_ = int(self.colors[obj_orders[r_id], 2])
                    img_detection[mask_from_detect, 0] = np.minimum(
                        255, img_detection[mask_from_detect, 0] + 0.8 * r_)
                    img_detection[mask_from_detect, 1] = np.minimum(
                        255, img_detection[mask_from_detect, 1] + 0.8 * g_)
                    img_detection[mask_from_detect, 2] = np.minimum(
                        255, img_detection[mask_from_detect, 2] + 0.8 * b_)
                    cv2.rectangle(img_detection, (roi[1], roi[0]),
                                  (roi[3], roi[2]), (r_, g_, b_), 2)
                    cv2.putText(img_detection, '{}'.format(obj_id),
                                (roi[1], roi[0]), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                (255, 255, 255), 2, 1)

                if not (obj_id in self.target_objs):
                    continue

                pix2pose_id = self.target_objs.index(obj_id)
                _,mask_pred,rot_pred,tra_pred,frac_inlier,_ =\
                self.obj_pix2pose[pix2pose_id].est_pose(image,roi.astype(np.int))
                if (frac_inlier == -1):
                    continue

                pred_tf_ori = np.eye(4)
                pred_tf_ori[:3, :3] = rot_pred
                pred_tf_ori[:3, 3] = tra_pred * self.model_scale

                if (detect_type == 'rcnn'):
                    union_mask = np.logical_or(mask_from_detect, mask_pred)
                    union = np.sum(union_mask)
                    if (union == 0):
                        mask_iou = 0
                        score = 0
                    else:
                        mask_iou = np.sum(
                            np.logical_and(mask_from_detect,
                                           mask_pred)) / union
                        score = scores[r_id] * frac_inlier * mask_iou * 1000

                    if (self.have_depth and self.icp):
                        union_mask = np.logical_and(union_mask, depth_valid)
                        pts_tgt = points_tgt[union_mask]
                        tf, residual = self.icp_refinement(
                            pts_tgt, self.obj_models[pix2pose_id], rot_pred,
                            tra_pred)
                        if (residual == -1):
                            continue
                        rot_pred = tf[:3, :3]
                        tra_pred = tf[:3, 3] * 1000

                        score = scores[r_id] / (residual + 0.00001)
                else:
                    score = scores[r_id]
                result_scores.append(score)
                tra_pred = tra_pred * self.model_scale  #mm to m
                if (tra_pred[2] < 0.1 or tra_pred[2] > 5):
                    continue
                pred_tf = np.eye(4)
                pred_tf[:3, :3] = rot_pred
                pred_tf[:3, 3] = tra_pred
                result_poses.append(pred_tf)
                result_ids.append(pix2pose_id)
                result_bbox.append(roi)
                result_poses_before.append(pred_tf_ori)
            #currently publish pose of the top scored objects
            self.detect_pub.publish(
                ros_numpy.msgify(ros_image,
                                 img_detection[:, :, ::-1],
                                 encoding='bgr8'))
            #render detection results
            #render pose estimation results
            for o_id, tf, score, roi in zip(result_ids, result_poses,
                                            result_scores, result_bbox):
                if (self.icp):
                    img_pose = self.draw_3d_poses(self.obj_bboxes[o_id], tf,
                                                  img_pose)
                else:
                    img_pose = self.draw_3d_poses(self.obj_bboxes[o_id], tf,
                                                  img_pose)
                cv2.putText(img_pose, '{:.3f}'.format(score), (roi[1], roi[0]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, 1)
            if (self.icp and self.pub_before_icp):
                for o_id, tf in zip(result_ids, result_poses_before):
                    img_pose_noicp = self.draw_3d_poses(
                        self.obj_bboxes[o_id], tf, img_pose_noicp)
                self.pose_pub_noicp.publish(
                    ros_numpy.msgify(ros_image,
                                     img_pose_noicp[:, :, ::-1],
                                     encoding='bgr8'))
            self.pose_pub.publish(
                ros_numpy.msgify(ros_image,
                                 img_pose[:, :, ::-1],
                                 encoding='bgr8'))

        self.sub = rospy.Subscriber(self.rgb_topic,
                                    ros_image,
                                    self.callback,
                                    queue_size=1)
        if (self.icp):
            self.sub_depth = rospy.Subscriber(self.depth_topic,
                                              ros_image,
                                              self.callback_depth,
                                              queue_size=1)
        self.have_depth = False