示例#1
0
def get_chair_mask(i_subject, i_seq, i_cam, i_frame):
    p = f'{paths.DATA_ROOT}/3dhp/S{i_subject}/Seq{i_seq}/ChairMasks/img_{i_cam}_{i_frame:06d}.jpg'
    chroma_frame = improc.imread_jpeg(p)
    is_fg = chroma_frame[..., 0] < 32
    mask, objbox = improc.largest_connected_component(is_fg)
    encoded_mask = improc.encode_mask(mask)
    return encoded_mask
示例#2
0
def get_mask(i_subject, i_seq, i_cam, i_frame):
    chroma_frame = improc.imread_jpeg(
        f'{paths.DATA_ROOT}/3dhp/S{i_subject}/Seq{i_seq}/FGmasks/img_{i_cam}_{i_frame:06d}.jpg'
    )
    person_box = get_box(i_subject, i_seq, i_cam, i_frame)

    is_fg = chroma_frame[..., 0] > 100
    n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
        is_fg.astype(np.uint8), 4, cv2.CV_32S)
    component_boxes = stats[:, :4]
    ious = [
        boxlib.iou(component_box, person_box)
        for component_box in component_boxes
    ]
    ious[0] = 0
    person_label = np.argmax(ious)
    mask = (labels == person_label).astype(np.uint8)

    # Remove foreground pixels that are far from the person box
    intbox = boxlib.intersect(boxlib.full_box((2048, 2048)),
                              boxlib.expand(person_box, 1.3)).astype(int)
    mask[:intbox[1]] = 0
    mask[:, :intbox[0]] = 0
    mask[:, intbox[0] + intbox[2]:] = 0
    mask[intbox[1] + intbox[3]:] = 0
    encoded_mask = improc.encode_mask(mask)
    return encoded_mask
示例#3
0
def make_efficient_example(ex,
                           further_expansion_factor=1,
                           further_scale_up=1,
                           dir_suffix=''):
    """Make example by storing the image in a cropped and resized version for efficient loading"""

    # Determine which area we will need from the image
    # This is a bit larger than the tight crop because of the geometric augmentations
    max_rotate = np.pi / 6
    padding_factor = 1 / 0.85
    scale_up_factor = 1 / 0.85 * further_scale_up
    scale_down_factor = 1 / 0.85
    shift_factor = 1.1
    base_dst_side = 256

    box_center = boxlib.center(ex.bbox)
    s, c = np.sin(max_rotate), np.cos(max_rotate)
    w, h = ex.bbox[2:]
    rot_bbox_side = max(c * w + s * h, c * h + s * w)
    rot_bbox = boxlib.box_around(box_center, rot_bbox_side)

    scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor,
                       1)
    expansion_factor = (padding_factor * shift_factor * scale_down_factor *
                        further_expansion_factor)
    expanded_bbox = boxlib.expand(rot_bbox, expansion_factor)
    expanded_bbox = boxlib.intersect(expanded_bbox,
                                     np.array([0, 0, 1000, 1000]))

    new_camera = copy.deepcopy(ex.camera)
    new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2]
    new_camera.scale_output(scale_factor)
    new_camera.undistort()

    new_im_relpath = ex.image_path.replace('h36m',
                                           f'h36m_downscaled{dir_suffix}')
    new_im_path = f'{paths.DATA_ROOT}/{new_im_relpath}'
    if not (util.is_file_newer(new_im_path, "2019-11-14T23:33:14")
            and improc.is_image_readable(new_im_path)):
        im = improc.imread_jpeg(ex.image_path)
        dst_shape = improc.rounded_int_tuple(scale_factor *
                                             expanded_bbox[[3, 2]])
        new_im = cameralib.reproject_image(im, ex.camera, new_camera,
                                           dst_shape)
        util.ensure_path_exists(new_im_path)
        imageio.imwrite(new_im_path, new_im)

    new_bbox_topleft = cameralib.reproject_image_points(
        ex.bbox[:2], ex.camera, new_camera)
    new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor])
    ex = ps3d.Pose3DExample(new_im_relpath,
                            ex.world_coords,
                            new_bbox,
                            new_camera,
                            activity_name=ex.activity_name)
    return ex
示例#4
0
def load_occluders():
    image_mask_pairs = []
    pascal_root = f'{paths.DATA_ROOT}/pascal_voc'
    image_paths = []
    for annotation_path in glob.glob(f'{pascal_root}/Annotations/*.xml'):
        xml_root = xml.etree.ElementTree.parse(annotation_path).getroot()
        is_segmented = (xml_root.find('segmented').text != '0')

        if not is_segmented:
            continue

        boxes = []
        for i_obj, obj in enumerate(xml_root.findall('object')):
            is_person = (obj.find('name').text == 'person')
            is_difficult = (obj.find('difficult').text != '0')
            is_truncated = (obj.find('truncated').text != '0')
            if not is_person and not is_difficult and not is_truncated:
                bndbox = obj.find('bndbox')
                box = [
                    int(bndbox.find(s).text)
                    for s in ['xmin', 'ymin', 'xmax', 'ymax']
                ]
                boxes.append((i_obj, box))

        if not boxes:
            continue

        image_filename = xml_root.find('filename').text
        segmentation_filename = image_filename.replace('jpg', 'png')

        path = f'{pascal_root}/JPEGImages/{image_filename}'
        seg_path = f'{pascal_root}/SegmentationObject/{segmentation_filename}'

        im = improc.imread_jpeg(path)
        labels = np.asarray(PIL.Image.open(seg_path))

        for i_obj, (xmin, ymin, xmax, ymax) in boxes:
            object_mask = (labels[ymin:ymax,
                                  xmin:xmax] == i_obj + 1).astype(np.uint8)
            object_image = im[ymin:ymax, xmin:xmax]
            # Ignore small objects
            if cv2.countNonZero(object_mask) < 500:
                continue

            object_mask = soften_mask(object_mask)
            downscale_factor = 0.5
            object_image = improc.resize_by_factor(object_image,
                                                   downscale_factor)
            object_mask = improc.resize_by_factor(object_mask,
                                                  downscale_factor)
            image_mask_pairs.append((object_image, object_mask))
            image_paths.append(path)

    return image_mask_pairs
示例#5
0
def make_efficient_example(ex, root_muco, i_person):
    image_relpath = ex.image_path
    max_rotate = np.pi / 6
    padding_factor = 1 / 0.85
    scale_up_factor = 1 / 0.85
    scale_down_factor = 1 / 0.85
    shift_factor = 1.2
    base_dst_side = 256
    box_center = boxlib.center(ex.bbox)
    s = np.sin(max_rotate)
    c = np.cos(max_rotate)
    rot_bbox_size = (np.array([[c, s], [s, c]]) @ ex.bbox[2:, np.newaxis])[:, 0]
    side = np.max(rot_bbox_size)
    rot_bbox_size = np.array([side, side])
    rot_bbox = boxlib.box_around(box_center, rot_bbox_size)

    scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1)
    expansion_factor = padding_factor * shift_factor * scale_down_factor
    expanded_bbox = boxlib.expand(rot_bbox, expansion_factor)
    expanded_bbox = boxlib.intersect(expanded_bbox, boxlib.full_box([2048, 2048]))

    new_camera = ex.camera.copy()
    new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2]
    new_camera.scale_output(scale_factor)
    new_camera.undistort()

    dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]])
    new_im_path = f'{root_muco}_downscaled/{image_relpath[:-4]}_{i_person:01d}.jpg'
    if not (util.is_file_newer(new_im_path, "2020-02-15T23:28:26")):
        im = improc.imread_jpeg(f'{root_muco}/{image_relpath}')
        new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape, antialias_factor=4)
        util.ensure_path_exists(new_im_path)
        imageio.imwrite(new_im_path, new_im, quality=95)

    new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], ex.camera, new_camera)
    new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor])

    if ex.mask is None:
        noext, ext = os.path.splitext(image_relpath[:-4])
        noext = noext.replace('unaugmented_set_001/', '')
        mask = improc.decode_mask(util.load_pickle(f'{root_muco}/masks/{noext}.pkl'))
    else:
        mask = ex.mask

    if mask is False:
        new_mask_encoded = None
    else:
        new_mask = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape)
        new_mask_encoded = improc.encode_mask(new_mask)

    return p3ds.Pose3DExample(
        os.path.relpath(new_im_path, paths.DATA_ROOT), ex.world_coords.astype(np.float32),
        new_bbox.astype(np.float32), new_camera, mask=new_mask_encoded,
        univ_coords=ex.univ_coords.astype(np.float32))
示例#6
0
def augment_background(im, fgmask, rng):
    path = util.choice(get_inria_holiday_background_paths(), rng)
    background_im = improc.imread_jpeg(path)

    cam = cameralib.Camera.create2D(background_im.shape)
    cam_new = cam.copy()

    zoom_aug_factor = rng.uniform(1.2, 1.5)
    cam_new.zoom(zoom_aug_factor *
                 np.max(im.shape[:2] / np.asarray(background_im.shape[:2])))
    cam_new.center_principal_point(im.shape)
    cam_new.shift_image(util.random_uniform_disc(rng) * im.shape[0] * 0.1)
    warped_background_im = cameralib.reproject_image(background_im, cam,
                                                     cam_new, im.shape)
    return improc.blend_image(warped_background_im, im, fgmask)
示例#7
0
def make_efficient_example(ex, rect_id):
    """Make example by storing the image in a cropped and resized version for efficient loading"""

    # Determine which area we will need
    # For rotation, usual padding around box, scale (shrink) augmentation and shifting
    padding_factor = 1 / 0.85
    scale_up_factor = 1 / 0.85
    scale_down_factor = 1 / 0.85
    shift_factor = 1.1
    max_rotate = np.pi / 6
    rot_factor = np.sin(max_rotate) + np.cos(max_rotate)
    base_dst_side = 256

    scale_factor = min(base_dst_side / ex.bbox[3] * scale_up_factor, 1)
    hopeful_factor = 0.9
    expansion_factor = (
            rot_factor * padding_factor * shift_factor * scale_down_factor * hopeful_factor)

    expanded_bbox = boxlib.expand(boxlib.expand_to_square(ex.bbox), expansion_factor)
    imsize = improc.image_extents(ex.image_path)
    full_box = np.array([0, 0, imsize[0], imsize[1]])
    expanded_bbox = boxlib.intersect(expanded_bbox, full_box)

    old_camera = cameralib.Camera.create2D()
    new_camera = old_camera.copy()
    new_camera.shift_image(-expanded_bbox[:2])
    new_camera.scale_output(scale_factor)

    dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]])
    new_im_path = ex.image_path.replace('mpii', f'mpii_downscaled')
    without_ext, ext = os.path.splitext(new_im_path)
    new_im_path = f'{without_ext}_{rect_id:02d}{ext}'

    if not (util.is_file_newer(new_im_path, "2019-11-12T17:54:06") and
            improc.is_image_readable(new_im_path)):
        im = improc.imread_jpeg(ex.image_path)
        new_im = cameralib.reproject_image(im, old_camera, new_camera, dst_shape)
        util.ensure_path_exists(new_im_path)
        imageio.imwrite(new_im_path, new_im)

    new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], old_camera, new_camera)
    new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor])
    new_coords = cameralib.reproject_image_points(ex.coords, old_camera, new_camera)
    ex = Pose2DExample(os.path.relpath(new_im_path, paths.DATA_ROOT), new_coords, bbox=new_bbox)
    return ex
示例#8
0
def augment_background(im, fgmask, rng):
    path = util.choice(get_inria_holiday_background_paths(), rng)
    background_im = improc.imread_jpeg(path)

    cam = cameralib.Camera.create2D(background_im.shape)
    cam_new = cam.copy()

    zoom_aug_factor = rng.uniform(1.2, 1.5)
    cam_new.zoom(zoom_aug_factor *
                 np.max(im.shape[:2] / np.asarray(background_im.shape[:2])))
    cam_new.center_principal_point(im.shape)
    cam_new.shift_image(util.random_uniform_disc(rng) * im.shape[0] * 0.1)

    interp_str = FLAGS.image_interpolation_train
    antialias = FLAGS.antialias_train
    interp = getattr(cv2, 'INTER_' + interp_str.upper())
    warped_background_im = cameralib.reproject_image(
        background_im,
        cam,
        cam_new,
        im.shape,
        interp=interp,
        antialias_factor=antialias)
    return improc.blend_image(warped_background_im, im, fgmask)
示例#9
0
def load_and_transform3d(ex, joint_info, learning_phase, rng=None):
    appearance_rng = util.new_rng(rng)
    background_rng = util.new_rng(rng)
    geom_rng = util.new_rng(rng)
    partial_visi_rng = util.new_rng(rng)

    output_side = FLAGS.proc_side
    output_imshape = (output_side, output_side)

    box = ex.bbox
    if FLAGS.partial_visibility:
        box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng)

    crop_side = np.max(box[2:])
    center_point = boxlib.center(box)
    if ((learning_phase == TRAIN and FLAGS.geom_aug) or
            (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)):
        center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side

    if box[2] < box[3]:
        delta_y = np.array([0, box[3] / 2])
        sidepoints = center_point + np.stack([-delta_y, delta_y])
    else:
        delta_x = np.array([box[2] / 2, 0])
        sidepoints = center_point + np.stack([-delta_x, delta_x])

    cam = ex.camera.copy()
    cam.turn_towards(target_image_point=center_point)
    cam.undistort()
    cam.square_pixels()
    world_sidepoints = ex.camera.image_to_world(sidepoints)
    cam_sidepoints = cam.world_to_image(world_sidepoints)
    crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1])
    cam.zoom(output_side / crop_side)
    cam.center_principal_point(output_imshape)

    if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug):
        s1 = FLAGS.scale_aug_down / 100
        s2 = FLAGS.scale_aug_up / 100
        r = FLAGS.rot_aug * np.pi / 180
        zoom = geom_rng.uniform(1 - s1, 1 + s2)
        cam.zoom(zoom)
        cam.rotate(roll=geom_rng.uniform(-r, r))

    world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords
    metric_world_coords = ex.world_coords

    if learning_phase == TRAIN and geom_rng.rand() < 0.5:
        cam.horizontal_flip()
        camcoords = cam.world_to_camera(world_coords)[joint_info.mirror_mapping]
        metric_world_coords = metric_world_coords[joint_info.mirror_mapping]
    else:
        camcoords = cam.world_to_camera(world_coords)

    imcoords = cam.world_to_image(metric_world_coords)

    image_path = util.ensure_absolute_path(ex.image_path)
    origsize_im = improc.imread_jpeg(image_path)

    interp_str = (FLAGS.image_interpolation_train
                  if learning_phase == TRAIN else FLAGS.image_interpolation_test)
    antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test)
    interp = getattr(cv2, 'INTER_' + interp_str.upper())
    im = cameralib.reproject_image(
        origsize_im, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp)

    if re.match('.+/mupots/TS[1-5]/.+', ex.image_path):
        im = improc.adjust_gamma(im, 0.67, inplace=True)
    elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path):
        im = improc.adjust_gamma(im, 0.67, inplace=True)
        im = improc.white_balance(im, 110, 145)

    if (FLAGS.background_aug_prob and hasattr(ex, 'mask') and ex.mask is not None and
            background_rng.rand() < FLAGS.background_aug_prob and
            (learning_phase == TRAIN or FLAGS.test_aug)):
        fgmask = improc.decode_mask(ex.mask)
        fgmask = cameralib.reproject_image(
            fgmask, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp)
        im = augmentation.background.augment_background(im, fgmask, background_rng)

    im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng)
    im = tfu.nhwc_to_std(im)
    im = improc.normalize01(im)

    # Joints with NaN coordinates are invalid
    is_joint_in_fov = ~np.logical_or(np.any(imcoords < 0, axis=-1),
                                     np.any(imcoords >= FLAGS.proc_side, axis=-1))
    joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1)

    rot_to_orig_cam = ex.camera.R @ cam.R.T
    rot_to_world = cam.R.T
    inv_intrinsics = np.linalg.inv(cam.intrinsic_matrix)

    return (
        ex.image_path, im, np.nan_to_num(camcoords).astype(np.float32),
        np.nan_to_num(imcoords).astype(np.float32), inv_intrinsics.astype(np.float32),
        rot_to_orig_cam.astype(np.float32), rot_to_world.astype(np.float32),
        cam.t.astype(np.float32), joint_validity_mask,
        np.float32(is_joint_in_fov), ex.activity_name, ex.scene_name)
示例#10
0
def load_and_transform2d(example, joint_info, learning_phase, rng):
    # Get the random number generators for the different augmentations to make it reproducibile
    appearance_rng = util.new_rng(rng)
    geom_rng = util.new_rng(rng)
    partial_visi_rng = util.new_rng(rng)

    # Load the image
    image_path = util.ensure_absolute_path(example.image_path)
    im_from_file = improc.imread_jpeg(image_path)

    # Determine bounding box
    bbox = example.bbox
    if FLAGS.partial_visibility:
        bbox = util.random_partial_subbox(boxlib.expand_to_square(bbox), partial_visi_rng)

    crop_side = np.max(bbox)
    center_point = boxlib.center(bbox)
    orig_cam = cameralib.Camera.create2D(im_from_file.shape)
    cam = orig_cam.copy()
    cam.zoom(FLAGS.proc_side / crop_side)

    if FLAGS.geom_aug:
        center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side
        s1 = FLAGS.scale_aug_down / 100
        s2 = FLAGS.scale_aug_up / 100
        cam.zoom(geom_rng.uniform(1 - s1, 1 + s2))
        r = FLAGS.rot_aug * np.pi / 180
        cam.rotate(roll=geom_rng.uniform(-r, r))

    if FLAGS.geom_aug and geom_rng.rand() < 0.5:
        # Horizontal flipping
        cam.horizontal_flip()
        # Must also permute the joints to exchange e.g. left wrist and right wrist!
        imcoords = example.coords[joint_info.mirror_mapping]
    else:
        imcoords = example.coords

    new_center_point = cameralib.reproject_image_points(center_point, orig_cam, cam)
    cam.shift_to_center(new_center_point, (FLAGS.proc_side, FLAGS.proc_side))

    is_annotation_invalid = (np.nan_to_num(imcoords[:, 1]) > im_from_file.shape[0] * 0.95)
    imcoords[is_annotation_invalid] = np.nan
    imcoords = cameralib.reproject_image_points(imcoords, orig_cam, cam)

    interp_str = (FLAGS.image_interpolation_train
                  if learning_phase == TRAIN else FLAGS.image_interpolation_test)
    antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test)
    interp = getattr(cv2, 'INTER_' + interp_str.upper())
    im = cameralib.reproject_image(
        im_from_file, orig_cam, cam, (FLAGS.proc_side, FLAGS.proc_side),
        antialias_factor=antialias, interp=interp)
    im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng)
    im = tfu.nhwc_to_std(im)
    im = improc.normalize01(im)

    joint_validity_mask = ~np.any(np.isnan(imcoords), axis=1)
    # We must eliminate NaNs because some TensorFlow ops can't deal with any NaNs touching them,
    # even if they would not influence the result. Therefore we use a separate "joint_validity_mask"
    # to indicate which joint coords are valid.
    imcoords = np.nan_to_num(imcoords)
    return example.image_path, np.float32(im), np.float32(imcoords), joint_validity_mask
示例#11
0
def load_and_transform3d(ex, joint_info, learning_phase, rng):
    # Get the random number generators for the different augmentations to make it reproducibile
    appearance_rng = util.new_rng(rng)
    background_rng = util.new_rng(rng)
    geom_rng = util.new_rng(rng)
    partial_visi_rng = util.new_rng(rng)

    output_side = FLAGS.proc_side
    output_imshape = (output_side, output_side)

    if 'sailvos' in ex.image_path.lower():
        # This is needed in order not to lose precision in later operations.
        # Background: In the Sailvos dataset (GTA V), some world coordinates
        # are crazy large (several kilometers, i.e. millions of millimeters, which becomes
        # hard to process with the limited simultaneous dynamic range of float32).
        # They are stored in float64 but the processing is done in float32 here.
        ex.world_coords -= ex.camera.t
        ex.camera.t[:] = 0

    box = ex.bbox
    if 'surreal' in ex.image_path.lower():
        # Surreal images are flipped wrong in the official dataset release
        box = box.copy()
        box[0] = 320 - (box[0] + box[2])

    # Partial visibility
    if 'surreal' in ex.image_path.lower() and 'surmuco' not in FLAGS.dataset:
        partial_visi_prob = 0.5
    elif 'h36m' in ex.image_path.lower() and 'many' in FLAGS.dataset:
        partial_visi_prob = 0.5
    else:
        partial_visi_prob = FLAGS.partial_visibility_prob

    use_partial_visi_aug = ((learning_phase == TRAIN or FLAGS.test_aug)
                            and partial_visi_rng.rand() < partial_visi_prob)
    if use_partial_visi_aug:
        box = util.random_partial_subbox(boxlib.expand_to_square(box),
                                         partial_visi_rng)

    # Geometric transformation and augmentation
    crop_side = np.max(box[2:])
    center_point = boxlib.center(box)
    if ((learning_phase == TRAIN and FLAGS.geom_aug) or
        (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)):
        center_point += util.random_uniform_disc(
            geom_rng) * FLAGS.shift_aug / 100 * crop_side

    # The homographic reprojection of a rectangle (bounding box) will not be another rectangle
    # Hence, instead we transform the side midpoints of the short sides of the box and
    # determine an appropriate zoom factor by taking the projected distance of these two points
    # and scaling that to the desired output image side length.
    if box[2] < box[3]:
        # Tall box: take midpoints of top and bottom sides
        delta_y = np.array([0, box[3] / 2])
        sidepoints = center_point + np.stack([-delta_y, delta_y])
    else:
        # Wide box: take midpoints of left and right sides
        delta_x = np.array([box[2] / 2, 0])
        sidepoints = center_point + np.stack([-delta_x, delta_x])

    cam = ex.camera.copy()
    cam.turn_towards(target_image_point=center_point)
    cam.undistort()
    cam.square_pixels()
    cam_sidepoints = cameralib.reproject_image_points(sidepoints, ex.camera,
                                                      cam)
    crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1])
    cam.zoom(output_side / crop_side)
    cam.center_principal_point(output_imshape)

    if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug):
        s1 = FLAGS.scale_aug_down / 100
        s2 = FLAGS.scale_aug_up / 100
        zoom = geom_rng.uniform(1 - s1, 1 + s2)
        cam.zoom(zoom)
        r = np.deg2rad(FLAGS.rot_aug)
        cam.rotate(roll=geom_rng.uniform(-r, r))

    world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords
    metric_world_coords = ex.world_coords

    if learning_phase == TRAIN and geom_rng.rand() < 0.5:
        cam.horizontal_flip()
        # Must reorder the joints due to left and right flip
        camcoords = cam.world_to_camera(world_coords)[
            joint_info.mirror_mapping]
        metric_world_coords = metric_world_coords[joint_info.mirror_mapping]
    else:
        camcoords = cam.world_to_camera(world_coords)

    imcoords = cam.world_to_image(metric_world_coords)

    # Load and reproject image
    image_path = util.ensure_absolute_path(ex.image_path)
    origsize_im = improc.imread_jpeg(image_path)
    if 'surreal' in ex.image_path.lower():
        # Surreal images are flipped wrong in the official dataset release
        origsize_im = origsize_im[:, ::-1]

    interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN
                  else FLAGS.image_interpolation_test)
    antialias = (FLAGS.antialias_train
                 if learning_phase == TRAIN else FLAGS.antialias_test)
    interp = getattr(cv2, 'INTER_' + interp_str.upper())
    im = cameralib.reproject_image(origsize_im,
                                   ex.camera,
                                   cam,
                                   output_imshape,
                                   antialias_factor=antialias,
                                   interp=interp)

    # Color adjustment
    if re.match('.*mupots/TS[1-5]/.+', ex.image_path):
        im = improc.adjust_gamma(im, 0.67, inplace=True)
    elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path):
        im = improc.adjust_gamma(im, 0.67, inplace=True)
        im = improc.white_balance(im, 110, 145)
    elif 'panoptic' in ex.image_path.lower():
        im = improc.white_balance(im, 120, 138)

    # Background augmentation
    if hasattr(ex, 'mask') and ex.mask is not None:
        bg_aug_prob = 0.2 if 'sailvos' in ex.image_path.lower(
        ) else FLAGS.background_aug_prob
        if (FLAGS.background_aug_prob
                and (learning_phase == TRAIN or FLAGS.test_aug)
                and background_rng.rand() < bg_aug_prob):
            fgmask = improc.decode_mask(ex.mask)
            if 'surreal' in ex.image_path:
                # Surreal images are flipped wrong in the official dataset release
                fgmask = fgmask[:, ::-1]
            fgmask = cameralib.reproject_image(fgmask,
                                               ex.camera,
                                               cam,
                                               output_imshape,
                                               antialias_factor=antialias,
                                               interp=interp)
            im = augmentation.background.augment_background(
                im, fgmask, background_rng)

    # Occlusion and color augmentation
    im = augmentation.appearance.augment_appearance(im, learning_phase,
                                                    FLAGS.occlude_aug_prob,
                                                    appearance_rng)
    im = tfu.nhwc_to_std(im)
    im = improc.normalize01(im)

    # Joints with NaN coordinates are invalid
    is_joint_in_fov = ~np.logical_or(
        np.any(imcoords < 0, axis=-1),
        np.any(imcoords >= FLAGS.proc_side, axis=-1))
    joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1)

    rot_to_orig_cam = ex.camera.R @ cam.R.T
    rot_to_world = cam.R.T

    return dict(image=im,
                intrinsics=np.float32(cam.intrinsic_matrix),
                image_path=ex.image_path,
                coords3d_true=np.nan_to_num(camcoords).astype(np.float32),
                coords2d_true=np.nan_to_num(imcoords).astype(np.float32),
                rot_to_orig_cam=rot_to_orig_cam.astype(np.float32),
                rot_to_world=rot_to_world.astype(np.float32),
                cam_loc=cam.t.astype(np.float32),
                joint_validity_mask=joint_validity_mask,
                is_joint_in_fov=np.float32(is_joint_in_fov))
示例#12
0
def make_efficient_example(ex,
                           new_image_path,
                           further_expansion_factor=1,
                           image_adjustments_3dhp=False,
                           min_time=None):
    """Make example by storing the image in a cropped and resized version for efficient loading"""

    is3d = hasattr(ex, 'world_coords')
    w, h = (improc.image_extents(util.ensure_absolute_path(ex.image_path))
            if isinstance(ex.image_path, str) else
            (ex.image_path.shape[1], ex.image_path.shape[0]))
    full_box = boxlib.full_box(imsize=[w, h])

    if is3d:
        old_camera = ex.camera
        new_camera = ex.camera.copy()
        new_camera.turn_towards(target_image_point=boxlib.center(ex.bbox))
        new_camera.undistort()
    else:
        old_camera = cameralib.Camera.create2D()
        new_camera = old_camera.copy()

    reprojected_box = reproject_box(ex.bbox,
                                    old_camera,
                                    new_camera,
                                    method='side_midpoints')
    reprojected_full_box = reproject_box(full_box,
                                         old_camera,
                                         new_camera,
                                         method='corners')
    expanded_bbox = (get_expanded_crop_box(
        reprojected_box, reprojected_full_box, further_expansion_factor)
                     if further_expansion_factor > 0 else reprojected_box)
    scale_factor = min(1.2, 256 / np.max(reprojected_box[2:]) * 1.5)
    new_camera.shift_image(-expanded_bbox[:2])
    new_camera.scale_output(scale_factor)

    reprojected_box = reproject_box(ex.bbox,
                                    old_camera,
                                    new_camera,
                                    method='side_midpoints')
    dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]])

    new_image_abspath = util.ensure_absolute_path(new_image_path)
    if not (util.is_file_newer(new_image_abspath, min_time)
            and improc.is_image_readable(new_image_abspath)):
        im = improc.imread_jpeg(ex.image_path) if isinstance(
            ex.image_path, str) else ex.image_path
        #host_im, cuda_im = get_memory(im.shape)
        im = np.power((im.astype(np.float32) / 255), 2.2)
        #cuda_im.upload(host_im)
        new_im = cameralib.reproject_image(im,
                                           old_camera,
                                           new_camera,
                                           dst_shape,
                                           antialias_factor=2,
                                           interp=cv2.INTER_CUBIC)
        new_im = np.clip(new_im, 0, 1)

        if image_adjustments_3dhp:
            # enhance the 3dhp images to reduce the green tint and increase brightness
            new_im = (new_im**(1 / 2.2 * 0.67) * 255).astype(np.uint8)
            new_im = improc.white_balance(new_im, 110, 145)
        else:
            new_im = (new_im**(1 / 2.2) * 255).astype(np.uint8)
        util.ensure_path_exists(new_image_abspath)
        imageio.imwrite(new_image_abspath, new_im, quality=95)
        assert improc.is_image_readable(new_image_abspath)

    new_ex = copy.deepcopy(ex)
    new_ex.bbox = reprojected_box
    new_ex.image_path = new_image_path
    if is3d:
        new_ex.camera = new_camera
    else:
        new_ex.coords = cameralib.reproject_image_points(
            new_ex.coords, old_camera, new_camera)

    if hasattr(ex, 'mask') and ex.mask is not None:
        if isinstance(ex.mask, str):
            mask = improc.imread_jpeg(util.ensure_absolute_path(ex.mask))
            host_mask, cuda_mask = get_memory(mask.shape)
            np.divide(mask.astype(np.float32), 255, out=host_mask)
            cuda_mask.upload(host_mask)
            mask_reproj = cameralib.reproject_image(
                cuda_mask,
                ex.camera,
                new_camera,
                dst_shape,
                antialias_factor=2).download()
            mask_reproj = 255 * (mask_reproj[..., 0] > 32 / 255).astype(
                np.uint8)
            new_ex.mask = get_connected_component_with_highest_iou(
                mask_reproj, reprojected_box)
        else:
            new_ex.mask = ex.mask
    return new_ex
示例#13
0
def make_efficient_example(ex):
    image_relpath = ex.image_path
    max_rotate = np.pi / 6
    padding_factor = 1 / 0.85
    scale_up_factor = 1 / 0.85
    scale_down_factor = 1 / 0.85
    shift_factor = 1.2
    base_dst_side = 256

    box_center = boxlib.center(ex.bbox)
    s, c = np.sin(max_rotate), np.cos(max_rotate)
    w, h = ex.bbox[2:]
    rot_bbox_side = max(c * w + s * h, c * h + s * w)
    rot_bbox = boxlib.box_around(box_center, rot_bbox_side)

    scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor,
                       1)
    expansion_factor = padding_factor * shift_factor * scale_down_factor
    expanded_bbox = boxlib.expand(rot_bbox, expansion_factor)
    expanded_bbox = boxlib.intersect(expanded_bbox,
                                     np.array([0, 0, 2048, 2048]))

    new_camera = ex.camera.copy()
    new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2]
    new_camera.scale_output(scale_factor)
    new_camera.undistort()
    dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]])

    new_im_relpath = ex.image_path.replace('3dhp', f'3dhp_downscaled')
    new_im_path = os.path.join(paths.DATA_ROOT, new_im_relpath)
    if not (util.is_file_newer(new_im_path, "2019-11-14T23:32:07")
            and improc.is_image_readable(new_im_path)):
        im = improc.imread_jpeg(f'{paths.DATA_ROOT}/{image_relpath}')
        new_im = cameralib.reproject_image(im, ex.camera, new_camera,
                                           dst_shape)
        util.ensure_path_exists(new_im_path)
        imageio.imwrite(new_im_path, new_im)

    new_bbox_topleft = cameralib.reproject_image_points(
        ex.bbox[:2], ex.camera, new_camera)
    new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor])

    mask_rle_relpath = new_im_path.replace('Images', 'FGmaskImages').replace(
        '.jpg', '.pkl')
    mask_rle_path = os.path.join(paths.DATA_ROOT, mask_rle_relpath)
    if util.is_file_newer(mask_rle_path, "2020-03-11T20:46:46"):
        mask_runlength = util.load_pickle(mask_rle_path)
    else:
        mask_relpath = ex.image_path.replace('Images', 'FGmaskImages').replace(
            '.jpg', '.png')
        mask = imageio.imread(os.path.join(paths.DATA_ROOT, mask_relpath))
        mask_reproj = cameralib.reproject_image(mask, ex.camera, new_camera,
                                                dst_shape)
        mask_runlength = get_mask_with_highest_iou(mask_reproj, new_bbox)
        util.dump_pickle(mask_runlength, mask_rle_path)

    return p3ds.Pose3DExample(new_im_relpath,
                              ex.world_coords,
                              new_bbox,
                              new_camera,
                              mask=mask_runlength,
                              univ_coords=ex.univ_coords)
示例#14
0
def get_composite_image(i_sample):
    s = f'{i_sample + 1:06d}'
    return improc.imread_jpeg(
        f'{paths.DATA_ROOT}/muco/unaugmented_set_001/{s[:2]}/{s[:4]}/{s}.jpg')
示例#15
0
def get_image(i_subj, i_seq, i_cam, i_frame):
    return improc.imread_jpeg(
        f'{paths.DATA_ROOT}/3dhp/S{i_subj}/Seq{i_seq}/imageSequence/img_{i_cam}_{i_frame:06d}.jpg'
    )