def make_efficient_example(ex, root_muco, i_person): image_relpath = ex.image_path max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.2 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s = np.sin(max_rotate) c = np.cos(max_rotate) rot_bbox_size = (np.array([[c, s], [s, c]]) @ ex.bbox[2:, np.newaxis])[:, 0] side = np.max(rot_bbox_size) rot_bbox_size = np.array([side, side]) rot_bbox = boxlib.box_around(box_center, rot_bbox_size) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = padding_factor * shift_factor * scale_down_factor expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, boxlib.full_box([2048, 2048])) new_camera = ex.camera.copy() new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_path = f'{root_muco}_downscaled/{image_relpath[:-4]}_{i_person:01d}.jpg' if not (util.is_file_newer(new_im_path, "2020-02-15T23:28:26")): im = improc.imread_jpeg(f'{root_muco}/{image_relpath}') new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape, antialias_factor=4) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im, quality=95) new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) if ex.mask is None: noext, ext = os.path.splitext(image_relpath[:-4]) noext = noext.replace('unaugmented_set_001/', '') mask = improc.decode_mask(util.load_pickle(f'{root_muco}/masks/{noext}.pkl')) else: mask = ex.mask if mask is False: new_mask_encoded = None else: new_mask = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape) new_mask_encoded = improc.encode_mask(new_mask) return p3ds.Pose3DExample( os.path.relpath(new_im_path, paths.DATA_ROOT), ex.world_coords.astype(np.float32), new_bbox.astype(np.float32), new_camera, mask=new_mask_encoded, univ_coords=ex.univ_coords.astype(np.float32))
def make_efficient_example(ex, further_expansion_factor=1, further_scale_up=1, dir_suffix=''): """Make example by storing the image in a cropped and resized version for efficient loading""" # Determine which area we will need from the image # This is a bit larger than the tight crop because of the geometric augmentations max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 * further_scale_up scale_down_factor = 1 / 0.85 shift_factor = 1.1 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s, c = np.sin(max_rotate), np.cos(max_rotate) w, h = ex.bbox[2:] rot_bbox_side = max(c * w + s * h, c * h + s * w) rot_bbox = boxlib.box_around(box_center, rot_bbox_side) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = (padding_factor * shift_factor * scale_down_factor * further_expansion_factor) expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, np.array([0, 0, 1000, 1000])) new_camera = copy.deepcopy(ex.camera) new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() new_im_relpath = ex.image_path.replace('h36m', f'h36m_downscaled{dir_suffix}') new_im_path = f'{paths.DATA_ROOT}/{new_im_relpath}' if not (util.is_file_newer(new_im_path, "2019-11-14T23:33:14") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(ex.image_path) dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points( ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) ex = ps3d.Pose3DExample(new_im_relpath, ex.world_coords, new_bbox, new_camera, activity_name=ex.activity_name) return ex
def augment_background(im, fgmask, rng): path = util.choice(get_inria_holiday_background_paths(), rng) background_im = improc.imread_jpeg(path) cam = cameralib.Camera.create2D(background_im.shape) cam_new = cam.copy() zoom_aug_factor = rng.uniform(1.2, 1.5) cam_new.zoom(zoom_aug_factor * np.max(im.shape[:2] / np.asarray(background_im.shape[:2]))) cam_new.center_principal_point(im.shape) cam_new.shift_image(util.random_uniform_disc(rng) * im.shape[0] * 0.1) warped_background_im = cameralib.reproject_image(background_im, cam, cam_new, im.shape) return improc.blend_image(warped_background_im, im, fgmask)
def make_efficient_example(ex, rect_id): """Make example by storing the image in a cropped and resized version for efficient loading""" # Determine which area we will need # For rotation, usual padding around box, scale (shrink) augmentation and shifting padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.1 max_rotate = np.pi / 6 rot_factor = np.sin(max_rotate) + np.cos(max_rotate) base_dst_side = 256 scale_factor = min(base_dst_side / ex.bbox[3] * scale_up_factor, 1) hopeful_factor = 0.9 expansion_factor = ( rot_factor * padding_factor * shift_factor * scale_down_factor * hopeful_factor) expanded_bbox = boxlib.expand(boxlib.expand_to_square(ex.bbox), expansion_factor) imsize = improc.image_extents(ex.image_path) full_box = np.array([0, 0, imsize[0], imsize[1]]) expanded_bbox = boxlib.intersect(expanded_bbox, full_box) old_camera = cameralib.Camera.create2D() new_camera = old_camera.copy() new_camera.shift_image(-expanded_bbox[:2]) new_camera.scale_output(scale_factor) dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_path = ex.image_path.replace('mpii', f'mpii_downscaled') without_ext, ext = os.path.splitext(new_im_path) new_im_path = f'{without_ext}_{rect_id:02d}{ext}' if not (util.is_file_newer(new_im_path, "2019-11-12T17:54:06") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(ex.image_path) new_im = cameralib.reproject_image(im, old_camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points(ex.bbox[:2], old_camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) new_coords = cameralib.reproject_image_points(ex.coords, old_camera, new_camera) ex = Pose2DExample(os.path.relpath(new_im_path, paths.DATA_ROOT), new_coords, bbox=new_bbox) return ex
def augment_background(im, fgmask, rng): path = util.choice(get_inria_holiday_background_paths(), rng) background_im = improc.imread_jpeg(path) cam = cameralib.Camera.create2D(background_im.shape) cam_new = cam.copy() zoom_aug_factor = rng.uniform(1.2, 1.5) cam_new.zoom(zoom_aug_factor * np.max(im.shape[:2] / np.asarray(background_im.shape[:2]))) cam_new.center_principal_point(im.shape) cam_new.shift_image(util.random_uniform_disc(rng) * im.shape[0] * 0.1) interp_str = FLAGS.image_interpolation_train antialias = FLAGS.antialias_train interp = getattr(cv2, 'INTER_' + interp_str.upper()) warped_background_im = cameralib.reproject_image( background_im, cam, cam_new, im.shape, interp=interp, antialias_factor=antialias) return improc.blend_image(warped_background_im, im, fgmask)
def load_and_transform3d(ex, joint_info, learning_phase, rng=None): appearance_rng = util.new_rng(rng) background_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) output_side = FLAGS.proc_side output_imshape = (output_side, output_side) box = ex.bbox if FLAGS.partial_visibility: box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng) crop_side = np.max(box[2:]) center_point = boxlib.center(box) if ((learning_phase == TRAIN and FLAGS.geom_aug) or (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)): center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side if box[2] < box[3]: delta_y = np.array([0, box[3] / 2]) sidepoints = center_point + np.stack([-delta_y, delta_y]) else: delta_x = np.array([box[2] / 2, 0]) sidepoints = center_point + np.stack([-delta_x, delta_x]) cam = ex.camera.copy() cam.turn_towards(target_image_point=center_point) cam.undistort() cam.square_pixels() world_sidepoints = ex.camera.image_to_world(sidepoints) cam_sidepoints = cam.world_to_image(world_sidepoints) crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1]) cam.zoom(output_side / crop_side) cam.center_principal_point(output_imshape) if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug): s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 r = FLAGS.rot_aug * np.pi / 180 zoom = geom_rng.uniform(1 - s1, 1 + s2) cam.zoom(zoom) cam.rotate(roll=geom_rng.uniform(-r, r)) world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords metric_world_coords = ex.world_coords if learning_phase == TRAIN and geom_rng.rand() < 0.5: cam.horizontal_flip() camcoords = cam.world_to_camera(world_coords)[joint_info.mirror_mapping] metric_world_coords = metric_world_coords[joint_info.mirror_mapping] else: camcoords = cam.world_to_camera(world_coords) imcoords = cam.world_to_image(metric_world_coords) image_path = util.ensure_absolute_path(ex.image_path) origsize_im = improc.imread_jpeg(image_path) interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image( origsize_im, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) if re.match('.+/mupots/TS[1-5]/.+', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) im = improc.white_balance(im, 110, 145) if (FLAGS.background_aug_prob and hasattr(ex, 'mask') and ex.mask is not None and background_rng.rand() < FLAGS.background_aug_prob and (learning_phase == TRAIN or FLAGS.test_aug)): fgmask = improc.decode_mask(ex.mask) fgmask = cameralib.reproject_image( fgmask, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) im = augmentation.background.augment_background(im, fgmask, background_rng) im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) # Joints with NaN coordinates are invalid is_joint_in_fov = ~np.logical_or(np.any(imcoords < 0, axis=-1), np.any(imcoords >= FLAGS.proc_side, axis=-1)) joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1) rot_to_orig_cam = ex.camera.R @ cam.R.T rot_to_world = cam.R.T inv_intrinsics = np.linalg.inv(cam.intrinsic_matrix) return ( ex.image_path, im, np.nan_to_num(camcoords).astype(np.float32), np.nan_to_num(imcoords).astype(np.float32), inv_intrinsics.astype(np.float32), rot_to_orig_cam.astype(np.float32), rot_to_world.astype(np.float32), cam.t.astype(np.float32), joint_validity_mask, np.float32(is_joint_in_fov), ex.activity_name, ex.scene_name)
def load_and_transform2d(example, joint_info, learning_phase, rng): # Get the random number generators for the different augmentations to make it reproducibile appearance_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) # Load the image image_path = util.ensure_absolute_path(example.image_path) im_from_file = improc.imread_jpeg(image_path) # Determine bounding box bbox = example.bbox if FLAGS.partial_visibility: bbox = util.random_partial_subbox(boxlib.expand_to_square(bbox), partial_visi_rng) crop_side = np.max(bbox) center_point = boxlib.center(bbox) orig_cam = cameralib.Camera.create2D(im_from_file.shape) cam = orig_cam.copy() cam.zoom(FLAGS.proc_side / crop_side) if FLAGS.geom_aug: center_point += util.random_uniform_disc(geom_rng) * FLAGS.shift_aug / 100 * crop_side s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 cam.zoom(geom_rng.uniform(1 - s1, 1 + s2)) r = FLAGS.rot_aug * np.pi / 180 cam.rotate(roll=geom_rng.uniform(-r, r)) if FLAGS.geom_aug and geom_rng.rand() < 0.5: # Horizontal flipping cam.horizontal_flip() # Must also permute the joints to exchange e.g. left wrist and right wrist! imcoords = example.coords[joint_info.mirror_mapping] else: imcoords = example.coords new_center_point = cameralib.reproject_image_points(center_point, orig_cam, cam) cam.shift_to_center(new_center_point, (FLAGS.proc_side, FLAGS.proc_side)) is_annotation_invalid = (np.nan_to_num(imcoords[:, 1]) > im_from_file.shape[0] * 0.95) imcoords[is_annotation_invalid] = np.nan imcoords = cameralib.reproject_image_points(imcoords, orig_cam, cam) interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image( im_from_file, orig_cam, cam, (FLAGS.proc_side, FLAGS.proc_side), antialias_factor=antialias, interp=interp) im = augmentation.appearance.augment_appearance(im, learning_phase, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) joint_validity_mask = ~np.any(np.isnan(imcoords), axis=1) # We must eliminate NaNs because some TensorFlow ops can't deal with any NaNs touching them, # even if they would not influence the result. Therefore we use a separate "joint_validity_mask" # to indicate which joint coords are valid. imcoords = np.nan_to_num(imcoords) return example.image_path, np.float32(im), np.float32(imcoords), joint_validity_mask
def load_and_transform3d(ex, joint_info, learning_phase, rng): # Get the random number generators for the different augmentations to make it reproducibile appearance_rng = util.new_rng(rng) background_rng = util.new_rng(rng) geom_rng = util.new_rng(rng) partial_visi_rng = util.new_rng(rng) output_side = FLAGS.proc_side output_imshape = (output_side, output_side) if 'sailvos' in ex.image_path.lower(): # This is needed in order not to lose precision in later operations. # Background: In the Sailvos dataset (GTA V), some world coordinates # are crazy large (several kilometers, i.e. millions of millimeters, which becomes # hard to process with the limited simultaneous dynamic range of float32). # They are stored in float64 but the processing is done in float32 here. ex.world_coords -= ex.camera.t ex.camera.t[:] = 0 box = ex.bbox if 'surreal' in ex.image_path.lower(): # Surreal images are flipped wrong in the official dataset release box = box.copy() box[0] = 320 - (box[0] + box[2]) # Partial visibility if 'surreal' in ex.image_path.lower() and 'surmuco' not in FLAGS.dataset: partial_visi_prob = 0.5 elif 'h36m' in ex.image_path.lower() and 'many' in FLAGS.dataset: partial_visi_prob = 0.5 else: partial_visi_prob = FLAGS.partial_visibility_prob use_partial_visi_aug = ((learning_phase == TRAIN or FLAGS.test_aug) and partial_visi_rng.rand() < partial_visi_prob) if use_partial_visi_aug: box = util.random_partial_subbox(boxlib.expand_to_square(box), partial_visi_rng) # Geometric transformation and augmentation crop_side = np.max(box[2:]) center_point = boxlib.center(box) if ((learning_phase == TRAIN and FLAGS.geom_aug) or (learning_phase != TRAIN and FLAGS.test_aug and FLAGS.geom_aug)): center_point += util.random_uniform_disc( geom_rng) * FLAGS.shift_aug / 100 * crop_side # The homographic reprojection of a rectangle (bounding box) will not be another rectangle # Hence, instead we transform the side midpoints of the short sides of the box and # determine an appropriate zoom factor by taking the projected distance of these two points # and scaling that to the desired output image side length. if box[2] < box[3]: # Tall box: take midpoints of top and bottom sides delta_y = np.array([0, box[3] / 2]) sidepoints = center_point + np.stack([-delta_y, delta_y]) else: # Wide box: take midpoints of left and right sides delta_x = np.array([box[2] / 2, 0]) sidepoints = center_point + np.stack([-delta_x, delta_x]) cam = ex.camera.copy() cam.turn_towards(target_image_point=center_point) cam.undistort() cam.square_pixels() cam_sidepoints = cameralib.reproject_image_points(sidepoints, ex.camera, cam) crop_side = np.linalg.norm(cam_sidepoints[0] - cam_sidepoints[1]) cam.zoom(output_side / crop_side) cam.center_principal_point(output_imshape) if FLAGS.geom_aug and (learning_phase == TRAIN or FLAGS.test_aug): s1 = FLAGS.scale_aug_down / 100 s2 = FLAGS.scale_aug_up / 100 zoom = geom_rng.uniform(1 - s1, 1 + s2) cam.zoom(zoom) r = np.deg2rad(FLAGS.rot_aug) cam.rotate(roll=geom_rng.uniform(-r, r)) world_coords = ex.univ_coords if FLAGS.universal_skeleton else ex.world_coords metric_world_coords = ex.world_coords if learning_phase == TRAIN and geom_rng.rand() < 0.5: cam.horizontal_flip() # Must reorder the joints due to left and right flip camcoords = cam.world_to_camera(world_coords)[ joint_info.mirror_mapping] metric_world_coords = metric_world_coords[joint_info.mirror_mapping] else: camcoords = cam.world_to_camera(world_coords) imcoords = cam.world_to_image(metric_world_coords) # Load and reproject image image_path = util.ensure_absolute_path(ex.image_path) origsize_im = improc.imread_jpeg(image_path) if 'surreal' in ex.image_path.lower(): # Surreal images are flipped wrong in the official dataset release origsize_im = origsize_im[:, ::-1] interp_str = (FLAGS.image_interpolation_train if learning_phase == TRAIN else FLAGS.image_interpolation_test) antialias = (FLAGS.antialias_train if learning_phase == TRAIN else FLAGS.antialias_test) interp = getattr(cv2, 'INTER_' + interp_str.upper()) im = cameralib.reproject_image(origsize_im, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) # Color adjustment if re.match('.*mupots/TS[1-5]/.+', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) elif '3dhp' in ex.image_path and re.match('.+/(TS[1-4])/', ex.image_path): im = improc.adjust_gamma(im, 0.67, inplace=True) im = improc.white_balance(im, 110, 145) elif 'panoptic' in ex.image_path.lower(): im = improc.white_balance(im, 120, 138) # Background augmentation if hasattr(ex, 'mask') and ex.mask is not None: bg_aug_prob = 0.2 if 'sailvos' in ex.image_path.lower( ) else FLAGS.background_aug_prob if (FLAGS.background_aug_prob and (learning_phase == TRAIN or FLAGS.test_aug) and background_rng.rand() < bg_aug_prob): fgmask = improc.decode_mask(ex.mask) if 'surreal' in ex.image_path: # Surreal images are flipped wrong in the official dataset release fgmask = fgmask[:, ::-1] fgmask = cameralib.reproject_image(fgmask, ex.camera, cam, output_imshape, antialias_factor=antialias, interp=interp) im = augmentation.background.augment_background( im, fgmask, background_rng) # Occlusion and color augmentation im = augmentation.appearance.augment_appearance(im, learning_phase, FLAGS.occlude_aug_prob, appearance_rng) im = tfu.nhwc_to_std(im) im = improc.normalize01(im) # Joints with NaN coordinates are invalid is_joint_in_fov = ~np.logical_or( np.any(imcoords < 0, axis=-1), np.any(imcoords >= FLAGS.proc_side, axis=-1)) joint_validity_mask = ~np.any(np.isnan(camcoords), axis=-1) rot_to_orig_cam = ex.camera.R @ cam.R.T rot_to_world = cam.R.T return dict(image=im, intrinsics=np.float32(cam.intrinsic_matrix), image_path=ex.image_path, coords3d_true=np.nan_to_num(camcoords).astype(np.float32), coords2d_true=np.nan_to_num(imcoords).astype(np.float32), rot_to_orig_cam=rot_to_orig_cam.astype(np.float32), rot_to_world=rot_to_world.astype(np.float32), cam_loc=cam.t.astype(np.float32), joint_validity_mask=joint_validity_mask, is_joint_in_fov=np.float32(is_joint_in_fov))
def make_efficient_example(ex, new_image_path, further_expansion_factor=1, image_adjustments_3dhp=False, min_time=None): """Make example by storing the image in a cropped and resized version for efficient loading""" is3d = hasattr(ex, 'world_coords') w, h = (improc.image_extents(util.ensure_absolute_path(ex.image_path)) if isinstance(ex.image_path, str) else (ex.image_path.shape[1], ex.image_path.shape[0])) full_box = boxlib.full_box(imsize=[w, h]) if is3d: old_camera = ex.camera new_camera = ex.camera.copy() new_camera.turn_towards(target_image_point=boxlib.center(ex.bbox)) new_camera.undistort() else: old_camera = cameralib.Camera.create2D() new_camera = old_camera.copy() reprojected_box = reproject_box(ex.bbox, old_camera, new_camera, method='side_midpoints') reprojected_full_box = reproject_box(full_box, old_camera, new_camera, method='corners') expanded_bbox = (get_expanded_crop_box( reprojected_box, reprojected_full_box, further_expansion_factor) if further_expansion_factor > 0 else reprojected_box) scale_factor = min(1.2, 256 / np.max(reprojected_box[2:]) * 1.5) new_camera.shift_image(-expanded_bbox[:2]) new_camera.scale_output(scale_factor) reprojected_box = reproject_box(ex.bbox, old_camera, new_camera, method='side_midpoints') dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_image_abspath = util.ensure_absolute_path(new_image_path) if not (util.is_file_newer(new_image_abspath, min_time) and improc.is_image_readable(new_image_abspath)): im = improc.imread_jpeg(ex.image_path) if isinstance( ex.image_path, str) else ex.image_path #host_im, cuda_im = get_memory(im.shape) im = np.power((im.astype(np.float32) / 255), 2.2) #cuda_im.upload(host_im) new_im = cameralib.reproject_image(im, old_camera, new_camera, dst_shape, antialias_factor=2, interp=cv2.INTER_CUBIC) new_im = np.clip(new_im, 0, 1) if image_adjustments_3dhp: # enhance the 3dhp images to reduce the green tint and increase brightness new_im = (new_im**(1 / 2.2 * 0.67) * 255).astype(np.uint8) new_im = improc.white_balance(new_im, 110, 145) else: new_im = (new_im**(1 / 2.2) * 255).astype(np.uint8) util.ensure_path_exists(new_image_abspath) imageio.imwrite(new_image_abspath, new_im, quality=95) assert improc.is_image_readable(new_image_abspath) new_ex = copy.deepcopy(ex) new_ex.bbox = reprojected_box new_ex.image_path = new_image_path if is3d: new_ex.camera = new_camera else: new_ex.coords = cameralib.reproject_image_points( new_ex.coords, old_camera, new_camera) if hasattr(ex, 'mask') and ex.mask is not None: if isinstance(ex.mask, str): mask = improc.imread_jpeg(util.ensure_absolute_path(ex.mask)) host_mask, cuda_mask = get_memory(mask.shape) np.divide(mask.astype(np.float32), 255, out=host_mask) cuda_mask.upload(host_mask) mask_reproj = cameralib.reproject_image( cuda_mask, ex.camera, new_camera, dst_shape, antialias_factor=2).download() mask_reproj = 255 * (mask_reproj[..., 0] > 32 / 255).astype( np.uint8) new_ex.mask = get_connected_component_with_highest_iou( mask_reproj, reprojected_box) else: new_ex.mask = ex.mask return new_ex
def make_efficient_example(ex): image_relpath = ex.image_path max_rotate = np.pi / 6 padding_factor = 1 / 0.85 scale_up_factor = 1 / 0.85 scale_down_factor = 1 / 0.85 shift_factor = 1.2 base_dst_side = 256 box_center = boxlib.center(ex.bbox) s, c = np.sin(max_rotate), np.cos(max_rotate) w, h = ex.bbox[2:] rot_bbox_side = max(c * w + s * h, c * h + s * w) rot_bbox = boxlib.box_around(box_center, rot_bbox_side) scale_factor = min(base_dst_side / np.max(ex.bbox[2:]) * scale_up_factor, 1) expansion_factor = padding_factor * shift_factor * scale_down_factor expanded_bbox = boxlib.expand(rot_bbox, expansion_factor) expanded_bbox = boxlib.intersect(expanded_bbox, np.array([0, 0, 2048, 2048])) new_camera = ex.camera.copy() new_camera.intrinsic_matrix[:2, 2] -= expanded_bbox[:2] new_camera.scale_output(scale_factor) new_camera.undistort() dst_shape = improc.rounded_int_tuple(scale_factor * expanded_bbox[[3, 2]]) new_im_relpath = ex.image_path.replace('3dhp', f'3dhp_downscaled') new_im_path = os.path.join(paths.DATA_ROOT, new_im_relpath) if not (util.is_file_newer(new_im_path, "2019-11-14T23:32:07") and improc.is_image_readable(new_im_path)): im = improc.imread_jpeg(f'{paths.DATA_ROOT}/{image_relpath}') new_im = cameralib.reproject_image(im, ex.camera, new_camera, dst_shape) util.ensure_path_exists(new_im_path) imageio.imwrite(new_im_path, new_im) new_bbox_topleft = cameralib.reproject_image_points( ex.bbox[:2], ex.camera, new_camera) new_bbox = np.concatenate([new_bbox_topleft, ex.bbox[2:] * scale_factor]) mask_rle_relpath = new_im_path.replace('Images', 'FGmaskImages').replace( '.jpg', '.pkl') mask_rle_path = os.path.join(paths.DATA_ROOT, mask_rle_relpath) if util.is_file_newer(mask_rle_path, "2020-03-11T20:46:46"): mask_runlength = util.load_pickle(mask_rle_path) else: mask_relpath = ex.image_path.replace('Images', 'FGmaskImages').replace( '.jpg', '.png') mask = imageio.imread(os.path.join(paths.DATA_ROOT, mask_relpath)) mask_reproj = cameralib.reproject_image(mask, ex.camera, new_camera, dst_shape) mask_runlength = get_mask_with_highest_iou(mask_reproj, new_bbox) util.dump_pickle(mask_runlength, mask_rle_path) return p3ds.Pose3DExample(new_im_relpath, ex.world_coords, new_bbox, new_camera, mask=mask_runlength, univ_coords=ex.univ_coords)