def test_blank_image(self): with mp_hands.Hands() as hands: image = np.zeros([100, 100, 3], dtype=np.uint8) image.fill(255) results = hands.process(image) self.assertIsNone(results.multi_hand_landmarks) self.assertIsNone(results.multi_handedness)
def test_multi_hands(self, static_image_mode, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg') hands = mp_hands.Hands(static_image_mode=static_image_mode, max_num_hands=2, min_detection_confidence=0.5) image = cv2.flip(cv2.imread(image_path), 1) def process_one_frame(): results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) handedness = [ handedness.classification[0].label for handedness in results.multi_handedness ] self.assertLen(handedness, 2) multi_hand_coordinates = [] for landmarks in results.multi_hand_landmarks: self.assertLen(landmarks.landmark, 21) x = [landmark.x for landmark in landmarks.landmark] y = [landmark.y for landmark in landmarks.landmark] hand_coordinates = np.transpose(np.stack( (y, x))) * image.shape[0:2] multi_hand_coordinates.append(hand_coordinates) self.assertLen(multi_hand_coordinates, 2) prediction_error = np.abs( np.asarray(multi_hand_coordinates) - np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION)) npt.assert_array_less(prediction_error, DIFF_THRESHOLOD) for _ in range(num_frames): process_one_frame() hands.close()
def test_multi_hands(self, static_image_mode, model_complexity, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/hands.jpg') image = cv2.imread(image_path) with mp_hands.Hands( static_image_mode=static_image_mode, max_num_hands=2, model_complexity=model_complexity, min_detection_confidence=0.5) as hands: for idx in range(num_frames): results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) self._annotate(image.copy(), results, idx) handedness = [ handedness.classification[0].label for handedness in results.multi_handedness ] multi_hand_coordinates = [] rows, cols, _ = image.shape for landmarks in results.multi_hand_landmarks: self.assertLen(landmarks.landmark, 21) x = [landmark.x * cols for landmark in landmarks.landmark] y = [landmark.y * rows for landmark in landmarks.landmark] hand_coordinates = np.column_stack((x, y)) multi_hand_coordinates.append(hand_coordinates) self.assertLen(handedness, 2) self.assertLen(multi_hand_coordinates, 2) prediction_error = np.abs( np.asarray(multi_hand_coordinates) - np.asarray(EXPECTED_HAND_COORDINATES_PREDICTION)) diff_threshold = LITE_MODEL_DIFF_THRESHOLD if model_complexity == 0 else FULL_MODEL_DIFF_THRESHOLD npt.assert_array_less(prediction_error, diff_threshold)
def _process_video(self, model_complexity, video_path, max_num_hands=1, num_landmarks=21, num_dimensions=3): # Predict pose landmarks for each frame. video_cap = cv2.VideoCapture(video_path) landmarks_per_frame = [] w_landmarks_per_frame = [] with mp_hands.Hands(static_image_mode=False, max_num_hands=max_num_hands, model_complexity=model_complexity, min_detection_confidence=0.5) as hands: while True: # Get next frame of the video. success, input_frame = video_cap.read() if not success: break # Run pose tracker. input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) frame_shape = input_frame.shape result = hands.process(image=input_frame) frame_landmarks = np.zeros( [max_num_hands, num_landmarks, num_dimensions]) * np.nan frame_w_landmarks = np.zeros( [max_num_hands, num_landmarks, num_dimensions]) * np.nan if result.multi_hand_landmarks: for idx, landmarks in enumerate( result.multi_hand_landmarks): landmarks = self._landmarks_list_to_array( landmarks, frame_shape) frame_landmarks[idx] = landmarks if result.multi_hand_world_landmarks: for idx, w_landmarks in enumerate( result.multi_hand_world_landmarks): w_landmarks = self._world_landmarks_list_to_array( w_landmarks) frame_w_landmarks[idx] = w_landmarks landmarks_per_frame.append(frame_landmarks) w_landmarks_per_frame.append(frame_w_landmarks) return (np.array(landmarks_per_frame), np.array(w_landmarks_per_frame))
def test_invalid_image_shape(self): hands = mp_hands.Hands() with self.assertRaisesRegex( ValueError, 'Input image must contain three channel rgb data.'): hands.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
frame = np.flip(frame, axis=1).copy() return cv2.resize(frame, (128, 128)) ################################################################################ # Creatig src and socket binding ################################################################################ context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:5555") ################################################################################ # Starting Hand-tracking ################################################################################ hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) ################################################################################ # Starting WebCam ################################################################################ cap = cv2.VideoCapture(1) while True: ######################################## # Stage 01 - Waiting print("Waiting request ...") # Wait for next request from client message = socket.recv_string()
import cv2 import mediapipe as mp import time import mediapipe.python.solutions.hands as mpHands cap = cv2.VideoCapture(1) mpHands = mp.solutions.hands hands = mpHands.Hands() mpDraw = mp.solutions.drawing_utils # palm detection module basically works on complete image and provide a cropped image of the hand # hand landmark module: find 21 different landmarks on cropped image of the hand while True: success, img = cap.read() # convert the BGR to RGB because the hands only use the RGB imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) results = hands.process(imgRGB) if results.multi_hand_landmarks: for handLms in results.multi_hand_landmarks: #print(handLms) print(mpHands.HAND_CONNECTIONS) mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS) if results.multi_handedness: print(results.multi_handedness) cv2.imshow("Image", img) cv2.waitKey(1)