def resize(self, image, keypoints, bbox, size):
        _, h, w = image.shape
        new_h, new_w = size

        image = transforms.resize(image, (new_h, new_w))
        keypoints = [
            transforms.resize_point(points, (h, w), (new_h, new_w))
            for points in keypoints
        ]
        new_bbox = []
        for x, y, bw, bh in bbox:
            [[y, x]] = transforms.resize_point(np.array([[y, x]]), (h, w), (new_h, new_w))
            bw *= new_w / w
            bh *= new_h / h
            new_bbox.append([x, y, bw, bh])
        return image, keypoints, new_bbox
def resize_contain(image, joint_zyx, camera, size, fill=0, return_param=False):
    _, inH, inW = image.shape
    resized, resize_param = transforms.resize_contain(
        image,
        size=size,
        return_param=True,
        fill=fill,
    )
    y_scale, x_scale = resize_param["scaled_size"] / np.array([inH, inW])

    print(resize_param)
    vu = camera.zyx2vu(joint_zyx.copy())
    vu = np.expand_dims(vu, axis=0)
    vu = transforms.resize_point(vu,
                                 in_size=(inH, inW),
                                 out_size=resize_param["scaled_size"])
    vu = transforms.translate_point(vu,
                                    y_offset=resize_param["y_offset"],
                                    x_offset=resize_param["x_offset"])

    camera_scaled = camera.scale_camera(y_scale=y_scale, x_scale=x_scale)
    camera_resized = camera_scaled.translate_camera(
        y_offset=resize_param["y_offset"], x_offset=resize_param["x_offset"])
    vu = camera_resized.zyx2vu(joint_zyx)
    return resized, vu, camera_resized
    def test_resize_point(self):
        point = np.random.uniform(low=0., high=32., size=(12, 2))

        out = resize_point(point, in_size=(16, 32), out_size=(8, 64))
        point[:, 0] *= 0.5
        point[:, 1] *= 2
        np.testing.assert_equal(out, point)
Exemplo n.º 4
0
def point2heatmap(points, indices, input_shape):
    """convert keypoint of a person to heatmap
    Args:
        points: np.ndarray, shape [16, 2] (y, x)
        indices: np.ndarray, available point or not shape [16, ]
        input_shape: tuple, input shape of an image of the person

    Returns:
        heatmap: shape [16, 64, 64]
    """
    points = transforms.resize_point(points, input_shape, (64, 64))
    # (y, x) -> (x, y)
    points = points[:, ::-1]
    # pose-hg-train/src/utils/img.lua drawGaussian
    # pose-hg-train/src/utils/pose.lua generateSample
    heatmap = np.zeros((16, 64, 64), dtype=np.float32)
    sigma = 1.0
    for i, (available, point) in enumerate(zip(indices, points)):
        if available:
            coordinates = np.array(np.meshgrid(range(64), range(64)))
            diff = coordinates - point[:, None, None]
            dist = np.sum(diff**2, axis=0)
            heatmap[i] = np.exp(-dist / (2 * sigma**2))

    return heatmap
Exemplo n.º 5
0
    def test_resize_point(self):
        point = np.random.uniform(
            low=0., high=32., size=(12, 2))

        out = resize_point(point, in_size=(16, 32), out_size=(8, 64))
        point[:, 0] *= 0.5
        point[:, 1] *= 2
        np.testing.assert_equal(out, point)
Exemplo n.º 6
0
    def test_resize_point_list(self):
        point = [
            np.random.uniform(low=0., high=32., size=(12, 2)),
            np.random.uniform(low=0., high=32., size=(10, 2))
        ]

        out = resize_point(point, in_size=(16, 32), out_size=(8, 64))
        for i, pnt in enumerate(point):
            pnt[:, 0] *= 0.5
            pnt[:, 1] *= 2
            np.testing.assert_equal(out[i], pnt)
def crop_around_3d_center(subject_id, action, seq_idx, frame_id):
    global image
    fig = plt.figure(figsize=(8, 8))
    ax1 = fig.add_subplot(221)
    ax2 = fig.add_subplot(222)
    ax3 = fig.add_subplot(223, projection="3d")
    label_3d(ax3)
    ax3.view_init(-90, -90)
    example = get_example(subject_id, action, seq_idx, frame_id)
    joints_zyx = example["world_joints"][:, ::-1]
    vu, z_ = zyx2depth_vu(joints_zyx, return_z=True)
    vu_com, z_com = calc_com(vu, z_)
    zyx_com = depth_vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze()
    z_com, y_com, x_com = zyx_com
    [
        xmin,
        ymin,
        xmax,
        ymax,
    ] = [
        x_com-crop3dW/2,
        y_com-crop3dH/2,
        x_com+crop3dW/2,
        y_com+crop3dH/2,
    ]
    [
        [vmin, umin],
        [vmax, umax],
    ] = zyx2depth_vu(np.array([
        [z_com, ymin, xmin],
        [z_com, ymax, xmax],
    ])).astype(int)
    domain = [vmin, umin, vmax, umax]
    depth = example["depth"]
    cropped, crop_param = crop_domain(depth, domain)
    vu = np.expand_dims(vu, axis=0)
    vu = transforms.translate_point(
        vu,
        y_offset=crop_param["y_offset"],
        x_offset=crop_param["x_offset"]
    )
    _, inH, inW = cropped.shape

    if inH < crop2dH or inW < crop2dW:
        cropped = chainercv.transforms.scale(
            cropped, size=max(crop2dH, crop2dW), fit_short=True)
        vu = transforms.resize_point(
            vu,
            in_size=(inH, inW),
            out_size=cropped.shape[1:],
        )
        _, inH, inW = cropped.shape

    resized, resize_param = transforms.resize_contain(
        cropped,
        size=(crop2dH, crop2dW),
        return_param=True,
        fill=define_background(cropped),
    )
    vu = transforms.resize_point(
        vu,
        in_size=(inH, inW),
        out_size=resize_param["scaled_size"]
    )
    vu = transforms.translate_point(
        vu,
        y_offset=resize_param["y_offset"],
        x_offset=resize_param["x_offset"]
    )
    # visualize
    color = [COLOR_MAP[k] for k in KEYPOINT_NAMES]
    vis_image(resized, ax=ax1)
    print(z_com, z_com-crop3dD/2, z_com+crop3dD/2)
    normalized = normalize_depth(resized, z_com, z_size=crop3dD)
    vis_image(normalized, ax=ax2)
    vis_point(point=vu, ax=ax1, color=color)
    vis_point(point=vu, ax=ax2, color=color)
    cropped_zyx = joints_zyx-zyx_com
    vis_point(point=[cropped_zyx], ax=ax3, color=color)
    edge_color = [COLOR_MAP[s, t] for s, t in EDGES]
    vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1)
    vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax2)
    vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax3)
def crop_around_3d_center(subject_id, action, seq_idx, frame_id):
    global image
    fig = plt.figure(figsize=(10, 5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122, projection="3d")
    label_3d(ax2)
    ax2.view_init(-90, -90)
    example = get_example(subject_id, action, seq_idx, frame_id)
    cam_joints_zyx = example["cam_joints"][:, ::-1]
    vu, z_ = zyx2vu(cam_joints_zyx, return_z=True)
    vu_com, z_com = calc_com(vu, z_)
    zyx_com = vu2zyx(vu_com[np.newaxis], z_com[np.newaxis]).squeeze()
    z_com, y_com, x_com = zyx_com
    [
        xmin,
        ymin,
        xmax,
        ymax,
    ] = [
        x_com-crop3dW/2,
        y_com-crop3dH/2,
        x_com+crop3dW/2,
        y_com+crop3dH/2,
    ]
    [
        [vmin, umin],
        [vmax, umax],
    ] = zyx2vu(np.array([
        [z_com, ymin, xmin],
        [z_com, ymax, xmax],
    ])).astype(int)
    domain = [vmin, umin, vmax, umax]
    img = example["image"]

    cropped, crop_param = crop_domain(img, domain)
    offset_vu = np.array([crop_param["y_offset"], crop_param["x_offset"]])
    vu = np.expand_dims(vu, axis=0)
    vu = transforms.translate_point(
        vu,
        y_offset=crop_param["y_offset"],
        x_offset=crop_param["x_offset"]
    )
    _, inH, inW = cropped.shape
    resized, resize_param = transforms.resize_contain(
        cropped,
        size=(crop2dH, crop2dW),
        return_param=True
    )
    vu = transforms.resize_point(vu, in_size=(
        inH, inW), out_size=resize_param["scaled_size"])
    vu = transforms.translate_point(
        vu,
        y_offset=resize_param["y_offset"],
        x_offset=resize_param["x_offset"]
    )
    # visualize
    color = [COLOR_MAP[k] for k in KEYPOINT_NAMES]
    chainercv.visualizations.vis_image(resized, ax=ax1)
    vis_point(point=vu, ax=ax1, color=color)
    cropped_zyx = cam_joints_zyx-zyx_com
    vis_point(point=[cropped_zyx], ax=ax2, color=color)
    edge_color = [COLOR_MAP[s, t] for s, t in EDGES]
    vis_edges(point=vu, indices=EDGES, color=edge_color, ax=ax1)
    vis_edges(point=[cropped_zyx], indices=EDGES, color=edge_color, ax=ax2)
Exemplo n.º 9
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument(
        '--model',
        default='',
        help='if not specified, you download and use a pre-trained model.')
    parser.add_argument('--snapshot', default='')
    parser.add_argument('--image', type=str)

    args = parser.parse_args()
    if not args.image:
        ValueError('args.image should be specified.')

    else:
        args.image = os.path.expanduser(args.image)

    model = StackedHG(16)

    if args.model:
        chainer.serializers.load_npz(args.model, model)

    elif args.snapshot:
        chainer.serializers.load_npz(snap2model_trainer(args.snapshot), model)

    else:
        model_path = './models/model_2018_05_22.npz'
        if not os.path.exists(model_path):
            os.makedirs("models", exist_ok=True)
            url = "https://github.com/fujibo/poseHG/releases/download/1.0.1/model_2018_05_22.npz"
            wget.download(url, model_path)

        chainer.serializers.load_npz(model_path, model)

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    chainer.config.train = False

    img = utils.read_image(args.image)
    img = img / 255.
    img = img.astype(np.float32)

    # expected properties
    # - A person is in the center of the image
    # - the height of this image == 1.25 * a person's scale (= height)
    img_resized = transforms.resize(img, (256, 256))
    img_resized = img_resized[np.newaxis]

    if args.gpu >= 0:
        img_resized = cuda.to_gpu(img_resized)

    with chainer.no_backprop_mode():
        # (1, 3, 256, 256) -> (1, 16, 64, 64) -> (16, 64, 64)
        _output, output = model(img_resized)
    output = cuda.to_cpu(output.array)[0]

    C, H, W = output.shape

    # (16, 64, 64) -> (16, )
    output = output.reshape(C, -1).argmax(axis=1)
    keypoint = np.unravel_index(output, (H, W))
    keypoint = np.array(keypoint).T
    keypoint = transforms.resize_point(keypoint, (H, W), img.shape[1:])

    img = cv2.imread(args.image)
    visualizer = MPIIVisualizer()
    img_pose = visualizer.run(img, keypoint)

    cv2.imwrite('input.jpg', img)
    cv2.imwrite('output.jpg', img_pose)
Exemplo n.º 10
0
def evaluate(model, dataset, device=-1, flip=False):

    batch_size = 50
    data_iter = chainer.iterators.MultithreadIterator(dataset,
                                                      batch_size,
                                                      repeat=False,
                                                      shuffle=False)

    corrects = list()
    counts = list()

    for it, batch in enumerate(data_iter):
        # print progress
        print(f'{batch_size*it:04d} / {len(dataset):04d}', end='\r')

        img, label, idx, scale, shape = concat_examples(batch)
        N, C, H, W = img.shape

        if flip:
            img = np.array((img, img[:, :, :, ::-1]))
            img = img.reshape(N * 2, C, H, W)

        if device >= 0:
            img = cuda.to_gpu(img)

        with chainer.no_backprop_mode():
            # (N, 3, 256, 256) -> (N, 16, 64, 64)

            _output, output = model(img)

        output = output.array

        if flip:

            output = output.reshape((
                2,
                N,
            ) + output.shape[1:])
            output_flipped = flip_heatmap(output[1], copy=True)
            output = (output[0] + output_flipped) / 2

        N, C, H, W = output.shape

        keypoints = list()
        # (N, 16, 64, 64) -> (N, 16, 2)
        for i in range(N):
            # (16, 64, 64) -> (16, -1)
            out_reshaped = output[i].reshape(C, -1).argmax(axis=1)
            out_reshaped = cuda.to_cpu(out_reshaped)
            keypoint = np.unravel_index(out_reshaped, (H, W))
            # (2, 16) -> (16, 2)
            keypoint = np.array(keypoint).T
            keypoint = transforms.resize_point(keypoint, (H, W), shape[i])
            keypoints.append(keypoint)

        else:
            keypoints = np.array(keypoints)

        correct, count = pckh_score(label, keypoints, idx, scale)
        corrects.append(correct)
        counts.append(count)

    print()
    corrects = np.sum(corrects, axis=0)
    counts = np.sum(counts, axis=0)
    # Head, Shoulder, Elbow, Wrist, Hip, Knee, Ankle
    joints = {
        'head': [8, 9],
        'shoulder': [12, 13],
        'elbow': [11, 14],
        'wrist': [10, 15],
        'hip': [2, 3],
        'knee': [1, 4],
        'ankle': [0, 5]
    }

    scores = dict()
    for key, value in joints.items():
        score = corrects[value].sum() / counts[value].sum()
        scores.update({key: score})
    return scores
Exemplo n.º 11
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument(
        '--model',
        default='',
        help='if not specified, you download and use a pre-trained model.')
    parser.add_argument('--snapshot', default='')
    parser.add_argument('--image', type=str)

    args = parser.parse_args()
    if not args.image:
        ValueError('args.image should be specified.')

    else:
        args.image = os.path.expanduser(args.image)

    detector = SSD512(pretrained_model='voc0712')
    model = StackedHG(16)

    if args.model:
        chainer.serializers.load_npz(args.model, model)

    elif args.snapshot:
        chainer.serializers.load_npz(snap2model_trainer(args.snapshot), model)

    else:
        # pre-trained model
        model_path = './models/model_2018_05_22.npz'
        if not os.path.exists(model_path):
            gdd.download_file_from_google_drive(
                file_id='1rZZJRpqQKkncn30Igtk8KirgR96QlCFO',
                dest_path=model_path)

        chainer.serializers.load_npz(model_path, model)

    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        detector.to_gpu()
        model.to_gpu()

    chainer.config.train = False

    img = utils.read_image(args.image)

    # detect persons
    bboxes, labels, scores = detector.predict([img])
    bbox, label, score = bboxes[0], labels[0], scores[0]

    # expand bboxes and crop the image
    img = img / 255.
    img = img.astype(np.float32)

    img_persons = list()
    bbox_persons = list()
    for ymin, xmin, ymax, xmax in bbox:
        scale = ymax - ymin

        # this is for ankle (also used in training with mpii dataset)
        offset = 15 / 200 * scale
        center = (xmin + xmax) / 2, (ymin + ymax) / 2 + offset

        # this is for ankle (also used in training with mpii dataset)
        scale *= 1.25

        xmin, xmax = center[0] - scale / 2, center[0] + scale / 2
        ymin, ymax = center[1] - scale / 2, center[1] + scale / 2

        # truncate
        xmin = int(max(0, xmin))
        ymin = int(max(0, ymin))
        xmax = int(min(img.shape[2], xmax))
        ymax = int(min(img.shape[1], ymax))

        # croping
        img_person = img[:, ymin:ymax, xmin:xmax]
        img_person = transforms.resize(img_person, (256, 256))

        img_persons.append(img_person)
        bbox_persons.append((ymin, xmin, ymax, xmax))

    img_persons = np.array(img_persons)
    bbox_persons = np.array(bbox_persons)

    utils.write_image(
        utils.tile_images((255 * img_persons).astype(np.float32), n_col=2),
        'tiled.jpg')

    # estimate poses
    if args.gpu >= 0:
        img_persons = cuda.to_gpu(img_persons)

    with chainer.no_backprop_mode():
        # (R, 3, 256, 256) -> (R, 16, 64, 64) -> (16, 64, 64)
        _outputs, outputs = model(img_persons)
    outputs = cuda.to_cpu(outputs.array)

    R, C, H, W = outputs.shape

    # heatmap to keypoint
    # R, C, H, W -> R, C, 2
    keypoints = list()
    for output in outputs:
        # (16, 64, 64) -> (16, )
        output = output.reshape(C, -1).argmax(axis=1)
        keypoint = np.unravel_index(output, (H, W))
        keypoint = np.array(keypoint).T
        keypoints.append(keypoint)

    # keypoint (local) to keypoint (global)
    keypoint_persons = list()
    for keypoint, bbox_person in zip(keypoints, bbox_persons):
        ymin, xmin, ymax, xmax = bbox_person
        keypoint = transforms.resize_point(keypoint, (H, W),
                                           (ymax - ymin, xmax - xmin))
        keypoint_person = keypoint + np.array((ymin, xmin))
        keypoint_persons.append(keypoint_person)

    # visualize
    img = cv2.imread(args.image)
    visualizer = MPIIVisualizer()

    img_pose = img.copy()
    for keypoint_person, bbox_person in zip(keypoint_persons, bbox_persons):
        ymin, xmin, ymax, xmax = bbox_person

        img_pose = visualizer.run(img_pose, keypoint_person)
        img_pose = cv2.rectangle(img_pose, (xmin, ymin), (xmax, ymax),
                                 (0, 255, 255), 10)

    cv2.imwrite('input.jpg', img)
    cv2.imwrite('output.jpg', img_pose)