示例#1
0
 def test_blank_image(self):
     pose = mp_pose.Pose()
     image = np.zeros([100, 100, 3], dtype=np.uint8)
     image.fill(255)
     results = pose.process(image)
     self.assertIsNone(results.pose_landmarks)
     pose.close()
示例#2
0
def draw_pose_video(filename=0):
    cap = cv2.VideoCapture(filename)
    with pose.Pose(
        min_detection_confidence=0.5, min_tracking_confidence=0.5,
    ) as extracter:

        while cap.isOpened():
            success, image = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
                continue

            image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = extracter.process(image)

            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            drawing_utils.draw_landmarks(
                image=image,
                landmark_list=results.pose_landmarks,
                connections=pose.POSE_CONNECTIONS,
            )
            cv2.imshow("MediaPipe Pose", image)

            if cv2.waitKey(5) & 0xFF == 27:
                break
    cap.release()
示例#3
0
    def test_on_image(self, static_image_mode, model_complexity, num_frames):
        image_path = os.path.join(os.path.dirname(__file__),
                                  'testdata/pose.jpg')
        expected_segmentation_path = os.path.join(
            os.path.dirname(__file__), 'testdata/pose_segmentation.png')
        image = cv2.imread(image_path)
        expected_segmentation = self._rgb_to_segmentation(
            Image.open(expected_segmentation_path).convert('RGB'))

        with mp_pose.Pose(static_image_mode=static_image_mode,
                          model_complexity=model_complexity,
                          enable_segmentation=True) as pose:
            for idx in range(num_frames):
                results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                segmentation = results.segmentation_mask.round().astype(
                    np.uint8)

                # TODO: Add rendering of world 3D when supported.
                self._annotate(image.copy(), results, idx)
                self._annotate_segmentation(segmentation,
                                            expected_segmentation, idx)

                self._assert_diff_less(
                    self._landmarks_list_to_array(results.pose_landmarks,
                                                  image.shape)[:, :2],
                    EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
                self._assert_diff_less(
                    self._world_landmarks_list_to_array(
                        results.pose_world_landmarks),
                    EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
                self.assertGreaterEqual(
                    self._segmentation_iou(expected_segmentation,
                                           segmentation), IOU_THRESHOLD)
示例#4
0
 def test_blank_image(self):
     with mp_pose.Pose(enable_segmentation=True) as pose:
         image = np.zeros([100, 100, 3], dtype=np.uint8)
         image.fill(255)
         results = pose.process(image)
         self.assertIsNone(results.pose_landmarks)
         self.assertIsNone(results.segmentation_mask)
示例#5
0
  def test_full_body_model(self, static_image_mode, num_frames):
    image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
    image = cv2.imread(image_path)

    with mp_pose.Pose(static_image_mode=static_image_mode) as pose:
      for _ in range(num_frames):
        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        self._assert_diff_less(
            self._landmarks_list_to_array(results.pose_landmarks, image.shape),
            EXPECTED_FULL_BODY_LANDMARKS,
            DIFF_THRESHOLD)
示例#6
0
    def test_full_body_model(self, static_image_mode, num_frames):
        image_path = os.path.join(os.path.dirname(__file__),
                                  'testdata/pose.jpg')
        pose = mp_pose.Pose(static_image_mode=static_image_mode)
        image = cv2.imread(image_path)

        for _ in range(num_frames):
            results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            self._verify_output_landmarks(results.pose_landmarks, image.shape,
                                          33)
        pose.close()
示例#7
0
  def test_on_video(self, upper_body_only, expected_name):
    """Tests pose models on a video."""
    # If set to `True` will dump actual predictions to .npz and JSON files.
    dump_predictions = False

    # Set threshold for comparing actual and expected predictions in pixels.
    diff_threshold = 50

    video_path = os.path.join(os.path.dirname(__file__),
                              'testdata/pose_squats.mp4')
    expected_path = os.path.join(os.path.dirname(__file__),
                                 'testdata/{}'.format(expected_name))

    # Predict pose landmarks for each frame.
    video_cap = cv2.VideoCapture(video_path)
    actual_per_frame = []
    with mp_pose.Pose(
        static_image_mode=False, upper_body_only=upper_body_only) as pose:
      while True:
        # Get next frame of the video.
        success, input_frame = video_cap.read()
        if not success:
          break

        # Run pose tracker.
        input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
        result = pose.process(image=input_frame)
        pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
                                                       input_frame.shape)

        actual_per_frame.append(pose_landmarks)
    actual = np.asarray(actual_per_frame)

    if dump_predictions:
      # Dump .npz
      with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        np.savez(tmp_file, predictions=np.array(actual))
        print('Predictions saved as .npz to {}'.format(tmp_file.name))

      # Dump JSON
      with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        with open(tmp_file.name, 'w') as fl:
          dump_data = {'predictions': np.around(actual, 3).tolist()}
          fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
          print('Predictions saved as JSON to {}'.format(tmp_file.name))

    # Validate actual vs. expected predictions.
    expected = np.load(expected_path)['predictions']
    assert actual.shape == expected.shape, (
        'Unexpected shape of predictions: {} instead of {}'.format(
            actual.shape, expected.shape))
    self._assert_diff_less(
        actual[..., :2], expected[..., :2], threshold=diff_threshold)
示例#8
0
def draw_pose_img(filename: str):
    with pose.Pose(static_image_mode=True, min_detection_confidence=0.5) as extracter:
        image = cv2.imread(filename)
        results = extracter.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        img_annot = image.copy()
        drawing_utils.draw_landmarks(
            image=img_annot,
            landmark_list=results.pose_landmarks,
            connections=pose.POSE_CONNECTIONS,
        )

        cv2.imwrite(filename.split(".")[0] + "-pose" + ".png", img_annot)
示例#9
0
 def test_on_image(self, static_image_mode, model_complexity, num_frames):
     image_path = os.path.join(os.path.dirname(__file__),
                               'testdata/pose.jpg')
     image = cv2.imread(image_path)
     with mp_pose.Pose(static_image_mode=static_image_mode,
                       model_complexity=model_complexity) as pose:
         for idx in range(num_frames):
             results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
             self._annotate(image.copy(), results, idx)
             self._assert_diff_less(
                 self._landmarks_list_to_array(results.pose_landmarks,
                                               image.shape)[:, :2],
                 EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
示例#10
0
def get_skeleton_by_frame(filename: str):
    cap = cv2.VideoCapture(filename)
    with pose.Pose(
        min_detection_confidence=0.5, min_tracking_confidence=0.5,
    ) as extracter:
        ls_frame = []
        while cap.isOpened():
            success, image = cap.read()
            if not success:
                break
            result = extracter.process(
                cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            ).pose_landmarks.landmark
            ls_frame.append(_get_skeleton(result))
    cap.release()

    output = np.expand_dims(np.array(ls_frame).transpose(2, 0, 1), axis=[-1, 0])

    return np.array(np.squeeze(output, axis=0))
    def bootstrap(self, per_pose_class_limit=None):
        """Bootstraps images in a given folder.
    
    Required image in folder (same use for image out folder):
      pushups_up/
        image_001.jpg
        image_002.jpg
        ...
      pushups_down/
        image_001.jpg
        image_002.jpg
        ...
      ...

    Produced CSVs out folder:
      pushups_up.csv
      pushups_down.csv

    Produced CSV structure with pose 3D landmarks:
      sample_00001,x1,y1,z1,x2,y2,z2,....
      sample_00002,x1,y1,z1,x2,y2,z2,....
    """
        # Create output folder for CVSs.
        if not os.path.exists(self._csvs_out_folder):
            os.makedirs(self._csvs_out_folder)

        for pose_class_name in self._pose_class_names:
            print('Bootstrapping ', pose_class_name, file=sys.stderr)

            # Paths for the pose class.
            images_in_folder = os.path.join(self._images_in_folder,
                                            pose_class_name)
            images_out_folder = os.path.join(self._images_out_folder,
                                             pose_class_name)
            csv_out_path = os.path.join(self._csvs_out_folder,
                                        pose_class_name + '.csv')
            if not os.path.exists(images_out_folder):
                os.makedirs(images_out_folder)

            with open(csv_out_path, 'w') as csv_out_file:
                csv_out_writer = csv.writer(csv_out_file,
                                            delimiter=',',
                                            quoting=csv.QUOTE_MINIMAL)
                # Get list of images.
                image_names = sorted([
                    n for n in os.listdir(images_in_folder)
                    if not n.startswith('.')
                ])
                if per_pose_class_limit is not None:
                    image_names = image_names[:per_pose_class_limit]

                # Bootstrap every image.
                for image_name in tqdm.tqdm(image_names):
                    # Load image.
                    input_frame = cv2.imread(
                        os.path.join(images_in_folder, image_name))
                    input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)

                    # Initialize fresh pose tracker and run it.
                    with mp_pose.Pose(upper_body_only=False) as pose_tracker:
                        result = pose_tracker.process(image=input_frame)
                        pose_landmarks = result.pose_landmarks

                    # Save image with pose prediction (if pose was detected).
                    output_frame = input_frame.copy()
                    if pose_landmarks is not None:
                        mp_drawing.draw_landmarks(
                            image=output_frame,
                            landmark_list=pose_landmarks,
                            connections=mp_pose.POSE_CONNECTIONS)
                    output_frame = cv2.cvtColor(output_frame,
                                                cv2.COLOR_RGB2BGR)
                    cv2.imwrite(os.path.join(images_out_folder, image_name),
                                output_frame)

                    # Save landmarks if pose was detected.
                    if pose_landmarks is not None:
                        # Get landmarks.
                        frame_height, frame_width = output_frame.shape[
                            0], output_frame.shape[1]
                        pose_landmarks = np.array([[
                            lmk.x * frame_width, lmk.y * frame_height,
                            lmk.z * frame_width
                        ] for lmk in pose_landmarks.landmark],
                                                  dtype=np.float32)
                        assert pose_landmarks.shape == (
                            33, 3), 'Unexpected landmarks shape: {}'.format(
                                pose_landmarks.shape)
                        csv_out_writer.writerow(
                            [image_name] +
                            pose_landmarks.flatten().astype(np.str).tolist())

                    # Draw XZ projection and concatenate with the image.
                    projection_xz = self._draw_xz_projection(
                        output_frame=output_frame,
                        pose_landmarks=pose_landmarks)
                    output_frame = np.concatenate(
                        (output_frame, projection_xz), axis=1)
    # Get some video parameters to generate output video with classificaiton.
    video_n_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT)
    video_fps = video_cap.get(cv2.CAP_PROP_FPS)
    video_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    video_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Initilize tracker, classifier and counter.
    # Do that before every video as all of them have state.
    from mediapipe.python.solutions import pose as mp_pose

    # Folder with pose class CSVs. That should be the same folder you using while
    # building classifier to output CSVs.
    pose_samples_folder = 'fitness_poses_csvs_out'

    # Initialize tracker.
    pose_tracker = mp_pose.Pose(upper_body_only=False)

    # Initialize embedder.
    pose_embedder = FullBodyPoseEmbedder()

    # Initialize classifier.
    # Ceck that you are using the same parameters as during bootstrapping.
    pose_classifier = PoseClassifier(pose_samples_folder=pose_samples_folder,
                                     pose_embedder=pose_embedder,
                                     top_n_by_max_distance=30,
                                     top_n_by_mean_distance=10)

    # # Uncomment to validate target poses used by classifier and find outliers.
    # outliers = pose_classifier.find_pose_sample_outliers()
    # print('Number of pose sample outliers (consider removing them): ', len(outliers))
示例#13
0
 def test_invalid_image_shape(self):
   with mp_pose.Pose() as pose:
     with self.assertRaisesRegex(
         ValueError, 'Input image must contain three channel rgb data.'):
       pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def classify(filename):

    # Specify your video name and target pose class to count the repetitions.
    video_path = filename
    class_names = ['pushups_down', 'lunges_down', 'squats_down']
    pose_samples_folders = [
        'fitness_poses_csvs_out_pushups', 'fitness_poses_csvs_out_lunges',
        'fitness_poses_csvs_out_squats'
    ]
    all_repetition_counts = []
    # out_video_path = f'{filename[:-3]}mov'

    # Open the video.

    for i in range(len(class_names)):
        class_name = class_names[i]

        video_cap = cv2.VideoCapture(video_path)

        # Get some video parameters to generate output video with classificaiton.
        video_n_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT)
        # video_fps = video_cap.get(cv2.CAP_PROP_FPS)
        # video_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        # video_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Initilize tracker, classifier and counter.
        # Do that before every video as all of them have state.

        # Folder with pose class CSVs. That should be the same folder you using while
        # building classifier to output CSVs.
        pose_samples_folder = pose_samples_folders[i]

        # Initialize tracker.
        pose_tracker = mp_pose.Pose(upper_body_only=False)

        # Initialize embedder.
        pose_embedder = FullBodyPoseEmbedder()

        # Initialize classifier.
        # Ceck that you are using the same parameters as during bootstrapping.
        pose_classifier = PoseClassifier(
            pose_samples_folder=pose_samples_folder,
            pose_embedder=pose_embedder,
            top_n_by_max_distance=30,
            top_n_by_mean_distance=10)

        # # Uncomment to validate target poses used by classifier and find outliers.
        # outliers = pose_classifier.find_pose_sample_outliers()
        # print('Number of pose sample outliers (consider removing them): ', len(outliers))

        # Initialize EMA smoothing.
        pose_classification_filter = EMADictSmoothing(window_size=10,
                                                      alpha=0.2)

        # Initialize counter.
        repetition_counter = RepetitionCounter(class_name=class_name,
                                               enter_threshold=6,
                                               exit_threshold=4)

        # Initialize renderer.
        # pose_classification_visualizer = PoseClassificationVisualizer(
        #     class_name=class_name,
        #     plot_x_max=video_n_frames,
        #     # Graphic looks nicer if it's the same as `top_n_by_mean_distance`.
        #     plot_y_max=10)

        # Run classification on a video.
        import os

        # Open output video.
        # out_video = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (video_width, video_height))

        frame_idx = 0
        repetitions_count = -1
        # output_frame = None
        with tqdm.tqdm(total=video_n_frames, position=0, leave=True) as pbar:
            while True:
                # Get next frame of the video.
                success, input_frame = video_cap.read()
                if not success:
                    break

                # Run pose tracker.
                input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
                result = pose_tracker.process(image=input_frame)
                pose_landmarks = result.pose_landmarks

                # Draw pose prediction.
                output_frame = input_frame.copy()
                if pose_landmarks is not None:
                    mp_drawing.draw_landmarks(
                        image=output_frame,
                        landmark_list=pose_landmarks,
                        connections=mp_pose.POSE_CONNECTIONS)

                if pose_landmarks is not None:
                    # Get landmarks.
                    frame_height, frame_width = output_frame.shape[
                        0], output_frame.shape[1]
                    pose_landmarks = np.array([[
                        lmk.x * frame_width, lmk.y * frame_height,
                        lmk.z * frame_width
                    ] for lmk in pose_landmarks.landmark],
                                              dtype=np.float32)
                    assert pose_landmarks.shape == (
                        33, 3), 'Unexpected landmarks shape: {}'.format(
                            pose_landmarks.shape)

                    # Classify the pose on the current frame.
                    pose_classification = pose_classifier(pose_landmarks)

                    # Smooth classification using EMA.
                    pose_classification_filtered = pose_classification_filter(
                        pose_classification)

                    # Count repetitions.
                    repetitions_count = repetition_counter(
                        pose_classification_filtered)
                else:
                    # No pose => no classification on current frame.
                    # pose_classification = None

                    # Still add empty classification to the filter to maintaing correct
                    # smoothing for future frames.
                    # pose_classification_filtered = pose_classification_filter(dict())
                    # pose_classification_filtered = None

                    # Don't update the counter presuming that person is 'frozen'. Just
                    # take the latest repetitions count.
                    repetitions_count = repetition_counter.n_repeats

                # Draw classification plot and repetition counter.
                # output_frame = pose_classification_visualizer(
                #     frame=output_frame,
                #     pose_classification=pose_classification,
                #     pose_classification_filtered=pose_classification_filtered,
                #     repetitions_count=repetitions_count)

                # Save the output frame.
                # out_video.write(cv2.cvtColor(np.array(output_frame), cv2.COLOR_RGB2BGR))

                # Show intermediate frames of the video to track progress.
                # if frame_idx % 50 == 0:
                #   show_image(output_frame)

                frame_idx += 1
                pbar.update()
        all_repetition_counts.append(repetitions_count)
        pose_tracker.close()

    # Close output video.
    # out_video.release()

    # Release MediaPipe resources.
    print(all_repetition_counts)
    pushup_counts, lunge_counts, squat_counts = all_repetition_counts[
        0], all_repetition_counts[1], all_repetition_counts[2]
    return pushup_counts, lunge_counts, squat_counts
示例#15
0
    def test_on_video(self, model_complexity, expected_name):
        """Tests pose models on a video."""
        # Set threshold for comparing actual and expected predictions in pixels.
        diff_threshold = 15
        world_diff_threshold = 0.1

        video_path = os.path.join(os.path.dirname(__file__),
                                  'testdata/pose_squats.mp4')
        expected_path = os.path.join(os.path.dirname(__file__),
                                     'testdata/{}'.format(expected_name))

        # Predict pose landmarks for each frame.
        video_cap = cv2.VideoCapture(video_path)
        actual_per_frame = []
        actual_world_per_frame = []
        frame_idx = 0
        with mp_pose.Pose(static_image_mode=False,
                          model_complexity=model_complexity) as pose:
            while True:
                # Get next frame of the video.
                success, input_frame = video_cap.read()
                if not success:
                    break

                # Run pose tracker.
                input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
                result = pose.process(image=input_frame)
                pose_landmarks = self._landmarks_list_to_array(
                    result.pose_landmarks, input_frame.shape)
                pose_world_landmarks = self._world_landmarks_list_to_array(
                    result.pose_world_landmarks)

                actual_per_frame.append(pose_landmarks)
                actual_world_per_frame.append(pose_world_landmarks)

                input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
                self._annotate(input_frame, result, frame_idx)
                frame_idx += 1
        actual = np.array(actual_per_frame)
        actual_world = np.array(actual_world_per_frame)

        # Dump actual .npz.
        npz_path = self._get_output_path(expected_name)
        np.savez(npz_path, predictions=actual, predictions_world=actual_world)

        # Dump actual JSON.
        json_path = self._get_output_path(
            expected_name.replace('.npz', '.json'))
        with open(json_path, 'w') as fl:
            dump_data = {
                'predictions': np.around(actual, 3).tolist(),
                'predictions_world': np.around(actual_world, 3).tolist()
            }
            fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))

        # Validate actual vs. expected landmarks.
        expected = np.load(expected_path)['predictions']
        assert actual.shape == expected.shape, (
            'Unexpected shape of predictions: {} instead of {}'.format(
                actual.shape, expected.shape))
        self._assert_diff_less(actual[..., :2],
                               expected[..., :2],
                               threshold=diff_threshold)

        # Validate actual vs. expected world landmarks.
        expected_world = np.load(expected_path)['predictions_world']
        assert actual_world.shape == expected_world.shape, (
            'Unexpected shape of world predictions: {} instead of {}'.format(
                actual_world.shape, expected_world.shape))
        self._assert_diff_less(actual_world,
                               expected_world,
                               threshold=world_diff_threshold)