def test_non_max_suppression_fast(self):

        # 3 boxes which overlap at 1,1 and another 3 boxes at 4,4
        boxes = np.array([
            [1.0, 1.0, 2, 2],
            [1.1, 1.1, 2, 2],
            [1.1, 1.1, 1.9, 1.9],

            [4.0, 4.0, 2, 2],
            [4.1, 4.1, 2, 2],
            [4.1, 4.1, 1.9, 1.9]
        ])

        picks = non_max_suppression_fast(boxes)
        self.assertEqual(len(picks), 2)
示例#2
0
    def detect_hand(self, img_norm):
        assert -1 <= img_norm.min() and img_norm.max() <= 1,\
        "img_norm should be in range [-1, 1]"
        assert img_norm.shape == (256, 256, 3),\
        "img_norm shape must be (256, 256, 3)"

        # predict hand location and 7 initial landmarks
        self.interp_palm.set_tensor(self.in_idx, img_norm[None])
        self.interp_palm.invoke()
        """
        out_reg shape is [number of anchors, 18]
        Second dimension 0 - 4 are bounding box offset, width and height: dx, dy, w ,h
        Second dimension 4 - 18 are 7 hand keypoint x and y coordinates: x1,y1,x2,y2,...x7,y7
        """
        out_reg = self.interp_palm.get_tensor(self.out_reg_idx)[0]
        """
        out_clf shape is [number of anchors]
        it is the classification score if there is a hand for each anchor box
        """
        out_clf = self.interp_palm.get_tensor(self.out_clf_idx)[0, :, 0]

        # finding the best prediction
        probabilities = self._sigm(out_clf)
        detecion_mask = probabilities > 0.5
        candidate_detect = out_reg[detecion_mask]
        candidate_anchors = self.anchors[detecion_mask]
        probabilities = probabilities[detecion_mask]

        if candidate_detect.shape[0] == 0:
            print("No hands found")
            return None, None, None

        # Pick the best bounding box with non maximum suppression
        # the boxes must be moved by the corresponding anchor first
        moved_candidate_detect = candidate_detect.copy()
        moved_candidate_detect[:, :2] = candidate_detect[:, :2] + (
            candidate_anchors[:, :2] * 256)
        box_ids = non_max_suppression_fast(moved_candidate_detect[:, :4],
                                           probabilities)

        # Pick the first detected hand. Could be adapted for multi hand recognition
        box_ids = box_ids[0]

        # bounding box offsets, width and height
        dx, dy, w, h = candidate_detect[box_ids, :4]
        center_wo_offst = candidate_anchors[box_ids, :2] * 256

        # 7 initial keypoints
        keypoints = center_wo_offst + candidate_detect[box_ids, 4:].reshape(
            -1, 2)
        side = max(w, h) * self.box_enlarge

        # now we need to move and rotate the detected hand for it to occupy a
        # 256x256 square
        # line from wrist keypoint to middle finger keypoint
        # should point straight up
        # TODO: replace triangle with the bbox directly
        source = self._get_triangle(keypoints[0], keypoints[2], side)
        source -= (keypoints[0] - keypoints[2]) * self.box_shift

        debug_info = {
            "detection_candidates": candidate_detect,
            "anchor_candidates": candidate_anchors,
            "selected_box_id": box_ids,
        }

        return source, keypoints, debug_info