def test_non_max_suppression_fast(self): # 3 boxes which overlap at 1,1 and another 3 boxes at 4,4 boxes = np.array([ [1.0, 1.0, 2, 2], [1.1, 1.1, 2, 2], [1.1, 1.1, 1.9, 1.9], [4.0, 4.0, 2, 2], [4.1, 4.1, 2, 2], [4.1, 4.1, 1.9, 1.9] ]) picks = non_max_suppression_fast(boxes) self.assertEqual(len(picks), 2)
def detect_hand(self, img_norm): assert -1 <= img_norm.min() and img_norm.max() <= 1,\ "img_norm should be in range [-1, 1]" assert img_norm.shape == (256, 256, 3),\ "img_norm shape must be (256, 256, 3)" # predict hand location and 7 initial landmarks self.interp_palm.set_tensor(self.in_idx, img_norm[None]) self.interp_palm.invoke() """ out_reg shape is [number of anchors, 18] Second dimension 0 - 4 are bounding box offset, width and height: dx, dy, w ,h Second dimension 4 - 18 are 7 hand keypoint x and y coordinates: x1,y1,x2,y2,...x7,y7 """ out_reg = self.interp_palm.get_tensor(self.out_reg_idx)[0] """ out_clf shape is [number of anchors] it is the classification score if there is a hand for each anchor box """ out_clf = self.interp_palm.get_tensor(self.out_clf_idx)[0, :, 0] # finding the best prediction probabilities = self._sigm(out_clf) detecion_mask = probabilities > 0.5 candidate_detect = out_reg[detecion_mask] candidate_anchors = self.anchors[detecion_mask] probabilities = probabilities[detecion_mask] if candidate_detect.shape[0] == 0: print("No hands found") return None, None, None # Pick the best bounding box with non maximum suppression # the boxes must be moved by the corresponding anchor first moved_candidate_detect = candidate_detect.copy() moved_candidate_detect[:, :2] = candidate_detect[:, :2] + ( candidate_anchors[:, :2] * 256) box_ids = non_max_suppression_fast(moved_candidate_detect[:, :4], probabilities) # Pick the first detected hand. Could be adapted for multi hand recognition box_ids = box_ids[0] # bounding box offsets, width and height dx, dy, w, h = candidate_detect[box_ids, :4] center_wo_offst = candidate_anchors[box_ids, :2] * 256 # 7 initial keypoints keypoints = center_wo_offst + candidate_detect[box_ids, 4:].reshape( -1, 2) side = max(w, h) * self.box_enlarge # now we need to move and rotate the detected hand for it to occupy a # 256x256 square # line from wrist keypoint to middle finger keypoint # should point straight up # TODO: replace triangle with the bbox directly source = self._get_triangle(keypoints[0], keypoints[2], side) source -= (keypoints[0] - keypoints[2]) * self.box_shift debug_info = { "detection_candidates": candidate_detect, "anchor_candidates": candidate_anchors, "selected_box_id": box_ids, } return source, keypoints, debug_info