Пример #1
0
def process_image(im_path, bbox_param):
    """
    Processes an image, producing 224x224 crop.

    Args:
        im_path (str).
        bbox_param (3,): [cx, cy, scale].
        visualize (bool).

    Returns:
        dict: image, im_shape, center, scale_factors, start_pt.
    """
    image = imread(im_path)
    center = bbox_param[:2]
    scale = bbox_param[2]

    # Pre-process image to [-1, 1]
    image = ((image / 255.) - 0.5) * 2
    image_scaled, scale_factors = resize_img(image, scale)
    center_scaled = np.round(center * scale_factors).astype(np.int)

    # Make sure there is enough space to crop 224x224.
    image_padded = np.pad(
        array=image_scaled,
        pad_width=((IMG_SIZE,), (IMG_SIZE,), (0,)),
        mode='edge'
    )
    height, width = image_padded.shape[:2]
    center_scaled += IMG_SIZE

    # Crop 224x224 around the center.
    margin = IMG_SIZE // 2

    start_pt = (center_scaled - margin).astype(int)
    end_pt = (center_scaled + margin).astype(int)
    end_pt[0] = min(end_pt[0], width)
    end_pt[1] = min(end_pt[1], height)
    image_scaled = image_padded[start_pt[1]:end_pt[1],
                                start_pt[0]:end_pt[0], :]
    center_scaled -= start_pt
    height, width = image_scaled.shape[:2]
    im_shape = [height, width]

    return {
        # return original too with info.
        'image': image_scaled,
        'im_path': im_path,
        'im_shape': im_shape,
        'center': center_scaled,
        'scale': scale,
        'start_pt': start_pt,
    }
Пример #2
0
def visualize_mesh_og(cam,
                      vert,
                      renderer,
                      start_pt,
                      scale,
                      proc_img_shape,
                      im_path=None,
                      img=None,
                      deg=0,
                      mesh_color='blue',
                      max_img_size=300,
                      pad=50,
                      crop_cam=None,
                      bbox=None):
    """
    Visualize mesh in original image space.
    If you get out of memory error, make max_img_size smaller.
    If crop_cam and bbox is not None,
    crops the image and uses the crop_cam to render.
    (See compute_video_bbox.py)
    """
    if img is None:
        img = imread(im_path)
        # Pre-process image to [-1, 1] bc it expects this.
        img = ((img / 255.) - 0.5) * 2

    if bbox is not None:
        assert (crop_cam is not None)
        img = img[bbox[0]:bbox[1], bbox[2]:bbox[3]]
        # For these, the cameras are already adjusted.
        scale = 1.
        start_pt = np.array([0, 0])
    if np.max(img.shape[:2]) > max_img_size:
        # if the image is too big it wont fit in gpu and nmr poops out.
        scale_orig = max_img_size / float(np.max(img.shape[:2]))
        img, _ = resize_img(img, scale_orig)
        undo_scale = (1. / np.array(scale)) * scale_orig
    else:
        undo_scale = 1. / np.array(scale)
    # NMR needs images to be square..
    img, pad_vals = make_square(img)
    img_size = np.max(img.shape[:2])
    renderer.renderer.image_size = img_size

    if bbox is not None:
        return renderer.rotated(
            verts=vert,
            deg=deg,
            cam=crop_cam,
            color_name=mesh_color,
        )
    else:
        # This is camera in crop image coord.
        cam_crop = np.hstack(
            [proc_img_shape[0] * cam[0] * 0.5, cam[1:] + (2. / cam[0]) * 0.5])

        # This is camera in orig image coord
        cam_orig = np.hstack([
            cam_crop[0] * undo_scale,
            cam_crop[1:] + (start_pt - proc_img_shape[0]) / cam_crop[0]
        ])

        # This is the camera in normalized orig_image coord
        new_cam = np.hstack([
            cam_orig[0] * (2. / img_size),
            cam_orig[1:] - (1 / ((2. / img_size) * cam_orig[0]))
        ])
        new_cam = new_cam.astype(np.float32)

        return renderer.rotated(
            verts=vert,
            deg=deg,
            cam=new_cam,
            color_name=mesh_color,
        )
Пример #3
0
def visualize_img_orig(cam,
                       kp_pred,
                       vert,
                       renderer,
                       start_pt,
                       scale,
                       proc_img_shape,
                       im_path=None,
                       img=None,
                       rotated_view=False,
                       mesh_color='blue',
                       max_img_size=300,
                       no_text=False,
                       bbox=None,
                       crop_cam=None):
    """
    Visualizes the image with the ground truth keypoints and predicted keypoints
    in the original image space (squared).
    If you get out of memory error, make max_img_size smaller.
    Args:
       must supply either the im_path or img
       start_pt, scale, proc_img_shape are parameters used to preprocess the
       image.
       scale_result is how much to scale the current image
    Returns:
        Combined image.
    """
    if img is None:
        img = imread(im_path)
        # Pre-process image to [-1, 1] bc it expects this.
        img = ((img / 255.) - 0.5) * 2
    if np.max(img.shape[:2]) > max_img_size:
        # if the image is too big it wont fit in gpu and nmr poops out.
        scale_orig = max_img_size / float(np.max(img.shape[:2]))
        img, _ = resize_img(img, scale_orig)
        undo_scale = (1. / np.array(scale)) * scale_orig
    else:
        undo_scale = 1. / np.array(scale)

    if bbox is not None:
        assert (crop_cam is not None)
        img = img[bbox[0]:bbox[1], bbox[2]:bbox[3]]
        # For these, the cameras are already adjusted.
        start_pt = np.array([0, 0])

    # NMR needs images to be square..
    img, pad_vals = make_square(img)
    img_size = np.max(img.shape[:2])
    renderer.renderer.image_size = img_size

    # Adjust kp_pred.
    # This is in 224x224 cropped space.
    pred_joint = ((kp_pred + 1) * 0.5) * proc_img_shape[0]
    # This is in the original image.
    pred_joint_orig = (pred_joint + start_pt - proc_img_shape[0]) * undo_scale

    # in normalize coord of the original image:
    kp_orig = 2 * (pred_joint_orig / img_size) - 1
    if bbox is not None:
        use_cam = crop_cam
    else:

        # This is camera in crop image coord.
        cam_crop = np.hstack(
            [proc_img_shape[0] * cam[0] * 0.5, cam[1:] + (2. / cam[0]) * 0.5])

        # This is camera in orig image coord
        cam_orig = np.hstack([
            cam_crop[0] * undo_scale,
            cam_crop[1:] + (start_pt - proc_img_shape[0]) / cam_crop[0]
        ])

        # This is the camera in normalized orig_image coord
        new_cam = np.hstack([
            cam_orig[0] * (2. / img_size),
            cam_orig[1:] - (1 / ((2. / img_size) * cam_orig[0]))
        ])
        new_cam = new_cam.astype(np.float32)
        use_cam = new_cam

    # Call visualize_img with this camera:
    rendered_orig = visualize_img(
        img=img,
        cam=use_cam,
        kp_pred=kp_orig,
        vert=vert,
        renderer=renderer,
        rotated_view=rotated_view,
        mesh_color=mesh_color,
        pad_vals=pad_vals,
        no_text=no_text,
    )

    return rendered_orig
Пример #4
0
def process_image(im_path, gt2d, coder, bbox_param, DRAW=False):
    with tf.gfile.FastGFile(im_path, 'rb') as f:
        image_data = f.read()
    image = coder.decode_jpeg(image_data)
    assert image.shape[2] == 3, \
        '{} has {} channels.'.format(im_path, image.shape[2])

    center = bbox_param[:2]
    scale = bbox_param[2]

    # estimate height..
    # Using vis_threshold 0 for DT
    vis = gt2d[:, 2] > 0.

    image_scaled, scale_factors = resize_img(image, scale)
    joints_scaled = np.copy(gt2d[:, :2])
    joints_scaled[:, 0] *= scale_factors[0]
    joints_scaled[:, 1] *= scale_factors[1]
    center_scaled = np.round(center * scale_factors).astype(np.int)

    # Make sure there is enough space to crop 300x300.
    image_padded = np.pad(image_scaled, ((300, ), (300, ), (0, )), 'edge')
    height, width = image_padded.shape[:2]
    center_scaled += 300
    joints_scaled += 300

    # Crop 300x300 around the center.
    margin = 150

    start_pt = (center_scaled - margin).astype(int)
    end_pt = (center_scaled + margin).astype(int)
    end_pt[0] = min(end_pt[0], width)
    end_pt[1] = min(end_pt[1], height)
    image_scaled = image_padded[start_pt[1]:end_pt[1], start_pt[0]:end_pt[
        0], :]
    # Update others too.
    joints_scaled[:, 0] -= start_pt[0]
    joints_scaled[:, 1] -= start_pt[1]
    center_scaled -= start_pt
    height, width = image_scaled.shape[:2]
    im_shape = [height, width]

    # DRAW:
    if DRAW:
        import matplotlib.pyplot as plt
        plt.ion()
        plt.clf()
        fig = plt.figure(1)
        # ax = fig.add_subplot(131)
        ax = fig.add_subplot(121)
        image_with_skel = draw_skeleton(image, gt2d[:, :2], vis=vis)
        ax.imshow(image_with_skel)
        ax.axis('off')
        ax.scatter(center[0], center[1], color='red')
        # ax = fig.add_subplot(132)
        ax = fig.add_subplot(122)
        image_with_skel_scaled = draw_skeleton(
            image_scaled, joints_scaled[:, :2], vis=vis)
        ax.imshow(image_with_skel_scaled)
        ax.scatter(center_scaled[0], center_scaled[1], color='red')

        import ipdb
        ipdb.set_trace()
    # Encode image.
    image_data_scaled = coder.encode_jpeg(image_scaled)
    label = np.vstack([joints_scaled.T, vis])

    return {
        'image_data': image_data_scaled,
        'image': image_scaled,
        'image_shape': im_shape,
        'label': label,
        'center': center_scaled,
        'scale_factors': scale_factors,
        'start_pt': start_pt,
    }
Пример #5
0
def process_image(im_path,
                  gt2d,
                  coder,
                  bbox_param,
                  visualize=False,
                  vis_thresh=0.1,
                  img_size=224):
    """
    Processes an image, producing 224x224 crop.

    Args:
        im_path (str).
        gt2d (19x3).
        coder (tf.ImageCoder).
        bbox_param (3,): [cx, cy, scale].
        visualize (bool).
        vis_thresh (float).
        img_size (int).

    Returns:
        dict: image_data_scaled, im_path, im_shape, kps, center,
            scale, start_pt.
    """
    with tf.gfile.FastGFile(im_path, 'rb') as f:
        image_data = f.read()
        image = coder.decode_jpeg(image_data)
        assert image.shape[2] == 3

    center = bbox_param[:2]
    scale = bbox_param[2]

    image_scaled, scale_factors = resize_img(image, scale)
    vis = gt2d[:, 2] > vis_thresh
    joints_scaled = np.copy(gt2d[:, :2])
    joints_scaled[:, 0] *= scale_factors[0]
    joints_scaled[:, 1] *= scale_factors[1]
    center_scaled = np.round(center * scale_factors).astype(np.int)

    # Make sure there is enough space to crop 224x224.
    image_padded = np.pad(array=image_scaled,
                          pad_width=((img_size, ), (img_size, ), (0, )),
                          mode='edge')
    height, width = image_padded.shape[:2]
    center_scaled += img_size
    joints_scaled += img_size

    # Crop 224x224 around the center.
    margin = img_size // 2

    start_pt = (center_scaled - margin).astype(int)
    end_pt = (center_scaled + margin).astype(int)
    end_pt[0] = min(end_pt[0], width)
    end_pt[1] = min(end_pt[1], height)
    image_scaled = image_padded[start_pt[1]:end_pt[1],
                                start_pt[0]:end_pt[0], :]
    # Update others too.
    joints_scaled[:, 0] -= start_pt[0]
    joints_scaled[:, 1] -= start_pt[1]
    center_scaled -= start_pt
    height, width = image_scaled.shape[:2]
    im_shape = [height, width]

    if visualize:
        if gt2d is None:
            image_with_skel = image
            image_with_skel_scaled = image_scaled
        else:
            image_with_skel = draw_skeleton(image, gt2d[:, :2], vis=vis)
            image_with_skel_scaled = draw_skeleton(image_scaled,
                                                   joints_scaled[:, :2],
                                                   vis=vis)

        plt.ion()
        plt.clf()
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        ax.imshow(image_with_skel)
        ax.axis('off')
        ax.scatter(center[0], center[1], color='red')
        ax = fig.add_subplot(122)

        ax.imshow(image_with_skel_scaled)
        ax.scatter(center_scaled[0], center_scaled[1], color='red')

        plt.show()
        plt.draw()
        plt.pause(5e-6)

    kps = np.vstack((joints_scaled.T, [vis]))

    return {
        'image_data_scaled': coder.encode_jpeg(image_scaled),
        'im_path': im_path,
        'im_shape': im_shape,
        'kps': kps,
        'center': center_scaled,
        'scale': scale,
        'start_pt': start_pt,
    }
Пример #6
0
def process_image(
    im_path,
    gt2d,
    cam,
    coder,
    pose=None,
    shape=None,
    gt3d=None,
    vis=False,
):
    # Read image.
    with tf.gfile.FastGFile(im_path, 'rb') as f:
        image_data = f.read()
        image = coder.decode_jpeg(coder.png_to_jpeg(image_data))
        assert image.shape[2] == 3

    # Use gt2d to get the scale.
    min_pt = np.min(gt2d, axis=0)
    max_pt = np.max(gt2d, axis=0)
    person_height = np.linalg.norm(max_pt - min_pt)
    center = (min_pt + max_pt) / 2.
    scale = 150. / person_height

    image_scaled, scale_factors = resize_img(image, scale)
    joints_scaled = np.copy(gt2d)
    joints_scaled[:, 0] *= scale_factors[0]
    joints_scaled[:, 1] *= scale_factors[1]
    center_scaled = np.round(center * scale_factors).astype(np.int)
    # scale camera:
    cam_scaled = np.copy(cam)
    # Flength
    cam_scaled[0] *= scale
    # px
    cam_scaled[1] *= scale_factors[0]
    # py
    cam_scaled[2] *= scale_factors[1]

    # Make sure there is enough space to crop 300x300.
    image_padded = np.pad(image_scaled, ((300, ), (300, ), (0, )), 'edge')
    height, width = image_padded.shape[:2]
    center_scaled += 300
    joints_scaled += 300

    # Crop 300x300 around the center.
    margin = 150
    start_pt = (center_scaled - margin).astype(int)
    end_pt = (center_scaled + margin).astype(int)
    end_pt[0] = min(end_pt[0], width)
    end_pt[1] = min(end_pt[1], height)
    image_scaled = image_padded[start_pt[1]:end_pt[1],
                                start_pt[0]:end_pt[0], :]
    # Update others too.
    joints_scaled[:, 0] -= start_pt[0]
    joints_scaled[:, 1] -= start_pt[1]
    center_scaled -= start_pt
    # Update principal point:
    cam_scaled[1] += 300 - start_pt[0]
    cam_scaled[2] += 300 - start_pt[1]
    height, width = image_scaled.shape[:2]
    im_shape = [height, width]
    # Vis:
    if vis:
        import matplotlib.pyplot as plt
        plt.ion()
        plt.clf()
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        image_with_skel = draw_skeleton(image, gt2d[:, :2])
        ax.imshow(image_with_skel)
        ax.axis('off')
        ax.scatter(center[0], center[1], color='red')
        ax = fig.add_subplot(122)
        image_with_skel_scaled = draw_skeleton(image_scaled,
                                               joints_scaled[:, :2])
        ax.imshow(image_with_skel_scaled)
        ax.scatter(center_scaled[0], center_scaled[1], color='red')

        # Project it.
        def project(X, c):
            y = X[:, :2] / X[:, 2].reshape(-1, 1)
            proj2d = c[0] * y + c[1:].reshape(1, -1)
            return proj2d

        proj2d = project(gt3d, cam_scaled)
        ax.scatter(proj2d[:, 0], proj2d[:, 1], s=4)
        ax.axis('off')
        import ipdb
        ipdb.set_trace()
    # Encode image.
    image_data_scaled = coder.encode_jpeg(image_scaled)
    # Put things together.
    label = np.vstack([joints_scaled.T, np.ones((1, len(COMMON_JOINT_IDS)))])

    return {
        'image_data': image_data_scaled,
        'image': image_scaled,
        'image_shape': im_shape,
        'label': label,
        'center': center_scaled,
        'scale_factors': scale_factors,
        'start_pt': start_pt,
        'cam_scaled': cam_scaled,
    }