Exemplo n.º 1
0
def analyze_camera(model, framework, resolution, lite):
    """
    Live prediction of pose coordinates from camera.
    
    Args:
        model: deep learning model
            Initialized EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite)
        framework: string
            Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch)
        resolution: int
            Input height and width of model to utilize
        lite: boolean
            Defines if EfficientPose Lite model is used
            
    Returns:
        Predicted pose coordinates in all frames of camera session.
    """

    # Load video
    import cv2
    start_time = time.time()
    cap = cv2.VideoCapture(0)
    _, frame = cap.read()
    frame_height, frame_width = frame.shape[:2]
    coordinates = []
    while (True):

        # Read frame
        _, frame = cap.read()

        # Construct batch
        batch = [frame[..., ::-1]]

        # Preprocess batch
        batch = helpers.preprocess(batch, resolution, lite)

        # Perform inference
        batch_outputs = infer(batch, model, lite, framework)

        # Extract coordinates for frame
        frame_coordinates = helpers.extract_coordinates(batch_outputs[0, ...],
                                                        frame_height,
                                                        frame_width,
                                                        real_time=True)
        coordinates += [frame_coordinates]

        # Draw and display predictions
        helpers.display_camera(cv2, frame, frame_coordinates, frame_height,
                               frame_width)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    # Print total operation time

    return coordinates
Exemplo n.º 2
0
def analyze_image(file_path, model, framework, resolution, lite):
    """
    Predict pose coordinates on supplied image.
    
    Args:
        file_path: path
            System path of image to analyze
        model: deep learning model
            Initialized EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite)
        framework: string
            Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch)
        resolution: int
            Input height and width of model to utilize
        lite: boolean
            Defines if EfficientPose Lite model is used
            
    Returns:
        Predicted pose coordinates in the supplied image.
    """

    # Load image
    from PIL import Image
    start_time = time.time()
    image = np.array(Image.open(file_path))
    image_height, image_width = image.shape[:2]
    batch = np.expand_dims(image, axis=0)

    # Preprocess batch
    batch = helpers.preprocess(batch, resolution, lite)

    # Perform inference
    batch_outputs = infer(batch, model, lite, framework)

    # Extract coordinates
    coordinates = [
        helpers.extract_coordinates(batch_outputs[0, ...], image_height,
                                    image_width)
    ]

    # Print processing time
    print(
        '\n##########################################################################################################'
    )
    print('Image processed in {0} seconds'.format('%.3f' %
                                                  (time.time() - start_time)))
    print(
        '##########################################################################################################\n'
    )

    return coordinates
Exemplo n.º 3
0
def analyze_video(file_path, model, framework, resolution, lite):
    """
    Predict pose coordinates on supplied video.
    
    Args:
        file_path: path
            System path of video to analyze
        model: deep learning model
            Initialized EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite)
        framework: string
            Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch)
        resolution: int
            Input height and width of model to utilize
        lite: boolean
            Defines if EfficientPose Lite model is used
            
    Returns:
        Predicted pose coordinates in all frames of the supplied video.
    """

    # Define batch size and number of batches in each part
    batch_size = 1 if framework in ['tensorflowlite', 'tflite'] else 49
    part_size = 490 if framework in ['tensorflowlite', 'tflite'] else 10

    # Load video
    from skvideo.io import vreader, ffprobe
    start_time = time.time()
    try:
        videogen = vreader(file_path)
        video_metadata = ffprobe(file_path)['video']
        num_video_frames = int(video_metadata['@nb_frames'])
        num_batches = int(np.ceil(num_video_frames / batch_size))
        frame_height, frame_width = next(vreader(file_path)).shape[:2]
    except:
        print(
            '\n##########################################################################################################'
        )
        print(
            'Video "{0}" could not be loaded. Please verify that the file is working.'
            .format(file_path))
        print(
            '##########################################################################################################\n'
        )
        return False

    # Operate on batches
    coordinates = []
    batch_num = 1
    part_start_time = time.time()
    print(
        '\n##########################################################################################################'
    )
    while True:

        # Fetch batch of frames
        batch = [next(videogen, None) for _ in range(batch_size)]
        if not type(batch[0]) == np.ndarray:
            break
        elif not type(batch[-1]) == np.ndarray:
            batch = [
                frame if type(frame) == np.ndarray else np.zeros(
                    (frame_height, frame_width, 3)) for frame in batch
            ]

        # Preprocess batch
        batch = helpers.preprocess(batch, resolution, lite)

        # Perform inference
        batch_outputs = infer(batch, model, lite, framework)

        # Extract coordinates for batch
        batch_coordinates = [
            helpers.extract_coordinates(batch_outputs[n, ...], frame_height,
                                        frame_width) for n in range(batch_size)
        ]
        coordinates += batch_coordinates

        # Print partial processing time
        if batch_num % part_size == 0:
            print(
                '{0} of {1}: Part processed in {2} seconds | Video processed for {3} seconds'
                .format(int(batch_num / part_size),
                        int(np.ceil(num_batches / part_size)),
                        '%.3f' % (time.time() - part_start_time),
                        '%.3f' % (time.time() - start_time)))
            part_start_time = time.time()
        batch_num += 1

    # Print total processing time
    print('{0} of {0}: Video processed in {1} seconds'.format(
        int(np.ceil(num_batches / part_size)),
        '%.3f' % (time.time() - start_time)))
    print(
        '##########################################################################################################\n'
    )

    return coordinates[:num_video_frames]
set_learning_phase(0)
model = load_model(os.path.join(
    'models', 'keras', 'EfficientPose{0}.h5'.format(model_variant.upper())),
                   custom_objects={
                       'BilinearWeights': helpers.keras_BilinearWeights,
                       'Swish': helpers.Swish(helpers.eswish),
                       'eswish': helpers.eswish,
                       'swish1': helpers.swish1
                   })

# file_path = f'./utils/MPII.jpg'
file_path = f'./utils/golf.jpeg'
img = cv2.imread(file_path)
h, w = img.shape[0], img.shape[1]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img /= 255.
img = img[np.newaxis, ...]

output = model.predict(img)[-1]
# output = output[0]
# output = np.sum(output, axis=-1)
# output = output[..., np.newaxis]
# output = np.repeat(output, 3, axis=-1)

coord = [helpers.extract_coordinates(output[0, ...], h, w)]
annotate_image(file_path, coord)

# plt.imshow(output, cmap='hot')
# plt.show()