def convert_to_open3d_pointcloud(dmap: Depthmap, floor_altitude_in_meters: float): """Converts depthmap into Open3D pointcloud. floor_altitude_in_meters is the floor altitude to align floor to Y=zero""" points = [] normals = [] points_3d_arr = dmap.convert_2d_to_3d_oriented() normal_3d_arr = dmap.calculate_normalmap_array(points_3d_arr) for x in range(2, dmap.width - 2): for y in range(2, dmap.height - 2): depth = dmap.depthmap_arr[x, y] if not depth: continue x_coord = points_3d_arr[0, x, y] y_coord = points_3d_arr[1, x, y] - floor_altitude_in_meters z_coord = points_3d_arr[2, x, y] x_normal = normal_3d_arr[0, x, y] y_normal = normal_3d_arr[1, x, y] z_normal = normal_3d_arr[2, x, y] points.append([x_coord, y_coord, z_coord]) normals.append([x_normal, y_normal, z_normal]) pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points) pcd.normals = o3d.utility.Vector3dVector(normals) return pcd
def predict_height(depthmap_file: str, rgb_file: str, calibration_file: str) -> float: # Check if it is captured by a new device dmap = Depthmap.create_from_zip_absolute(depthmap_file, 0, calibration_file) angle = dmap.get_angle_between_camera_and_floor() # Run segmentation im = Image.open(rgb_file).rotate(-90, expand=True) resized_im, seg_map = DEEPLAB_MODEL.run(im) seg_map[seg_map != PERSON_SEGMENTATION] = 0 # Check if the child's head is fully visible boundary = calculate_boundary(seg_map) if boundary[0] <= 0: raise Exception( 'Skipping because the child\'s head is not fully visible') # Upscale depthmap floor = dmap.get_floor_level() mask = dmap.detect_floor(floor) depth = dmap.get_distance_of_child_from_camera(mask) dmap.resize(seg_map.shape[0], seg_map.shape[1]) dmap.depthmap_arr[:, :] = depth # Calculate height seg_map[seg_map == PERSON_SEGMENTATION] = MASK_CHILD highest = dmap.get_highest_point(seg_map)[1] factor = 1.0 + math.sin(math.radians(angle)) * HEIGHT_SCALE_FACTOR height_in_cm = factor * (highest - floor) * 100.0 + HEIGHT_OFFSET_IN_CM return height_in_cm
def create_layers_rgbd(depthmap_fpath: str, rgb_fpath: str, should_rotate_rgb: bool) -> Tuple[np.ndarray, dict]: if should_rotate_rgb: dmap = Depthmap.create_from_zip_absolute(depthmap_fpath, rgb_fpath, CALIBRATION_FPATH) else: dmap = rotate_and_load_depthmap_with_rgbd(depthmap_fpath, rgb_fpath, CALIBRATION_FPATH) if not dmap.device_pose: raise InvalidDevicePoseError() depthmap = dmap.depthmap_arr # shape: (longer, shorter) depthmap = preprocess(depthmap) # shape (longer, shorter, 1) rgb = dmap.rgb_array # shape (longer, shorter, 3) rgb = preprocess_rgb(rgb) # shape (longer, shorter, 3) layers = np.concatenate( [ depthmap, # shape (longer, shorter, 1) rgb, # shape (longer, shorter, 3) ], axis=2) # shape (longer, shorter, 4) metadata = { 'device_pose': dmap.device_pose, 'raw_header': dmap.header, 'angle': dmap.get_angle_between_camera_and_floor(), } return layers, metadata
def render_normal(dmap: Depthmap) -> np.ndarray: """Render normal vectors How normal vector are visualized: When a vector has (x,y,z)=(1,0,0), this will show in red color. When a vector has (x,y,z)=(0,1,0), this will show in green color (e.g. floor). When a vector has (x,y,z)=(0,0,1), this will show in blue color. """ points_3d_arr = dmap.convert_2d_to_3d_oriented(should_smooth=True) normal = dmap.calculate_normalmap_array(points_3d_arr) # We can't see negative values, so we take the absolute value normal = abs(normal) # shape: (3, width, height) return np.moveaxis(normal, 0, -1)
def test_depthmap(): dmap = Depthmap.create_from_zip_absolute(DEPTHMAP_FPATH, RGB_FPATH, CALIBRATION_FILE) assert dmap.width == 240 assert dmap.height == 180 dmap_intrinsics = np.array([dmap.fx, dmap.fy, dmap.cx, dmap.cy]) expected_intrinsics = np.array( [162.883128, 162.881251, 119.004372, 90.630756]) np.testing.assert_array_almost_equal(dmap_intrinsics, expected_intrinsics) assert dmap.max_confidence == 7. assert dmap.depth_scale == 0.001 floor = dmap.get_floor_level() mask = dmap.segment_child(floor) highest_point = dmap.get_highest_point(mask) child_height_in_m = highest_point[1] - floor assert 0 < child_height_in_m < 1.2 assert mask.shape[:2] == dmap.rgb_array.shape[:2] angle_in_degrees = dmap.get_angle_between_camera_and_floor() assert -90 < angle_in_degrees < 90 distance_in_m = dmap.get_distance_of_child_from_camera(mask) assert 0.1 < distance_in_m < 5. dmap.resize(640, 360) dmap_intrinsics = np.array([dmap.fx, dmap.fy, dmap.cx, dmap.cy]) expected_intrinsics = np.array( [434.355008, 325.762502, 317.344992, 181.261512]) np.testing.assert_array_almost_equal(dmap_intrinsics, expected_intrinsics)
def test_blur_face(): dmap = Depthmap.create_from_zip_absolute(DEPTHMAP_FPATH, RGB_FPATH, CALIBRATION_FPATH) # Find top of the object floor = dmap.get_floor_level() assert floor == pytest.approx(-0.9706086, 0.001) mask = dmap.segment_child(floor) highest_point = dmap.get_highest_point(mask) # 3D # Render the color data output_unblurred = render_rgb(dmap) # Blur output_blurred = blur_face(output_unblurred, highest_point, dmap, CHILD_HEAD_HEIGHT_IN_METERS) # Assert some pixels in whole image change (image not same) all_count = dmap.width * dmap.height count = np.count_nonzero(output_unblurred - output_blurred) / 3 ratio_blurred = count / all_count assert 0.01 < ratio_blurred < 0.9 # Assert that blurred around object object_x = int(dmap.width * OFFSET_X_Y[0]) object_y = int(dmap.height * (1.0 - OFFSET_X_Y[1])) slice_x = slice(object_x - 2, object_x + 2) slice_y = slice(object_y - 2, object_y + 2) assert (output_unblurred[slice_x, slice_y] != output_blurred[slice_x, slice_y]).any() # Assert that NOT blurred around corner corner_x = 0 corner_y = 0 slice_x = slice(corner_x, corner_x + 4) slice_y = slice(corner_y, corner_y + 4) np.testing.assert_array_equal(output_unblurred[slice_x, slice_y], output_blurred[slice_x, slice_y])
def render_segmentation(floor: float, mask: np.ndarray, dmap: Depthmap) -> np.ndarray: # segmentation red = [1, 0, 0] blue = [0, 0, 1] yellow = [1, 1, 0] output = np.zeros((dmap.width, dmap.height, 3)) output[mask == MASK_CHILD] = yellow output[mask == MASK_FLOOR] = blue output[mask < 0] = red # pattern mapping points_3d_arr = dmap.convert_2d_to_3d_oriented(should_smooth=True) elevation = points_3d_arr[1, :, :] - floor horizontal = (elevation % PATTERN_LENGTH_IN_METERS) / PATTERN_LENGTH_IN_METERS vertical_x = (points_3d_arr[0, :, :] % PATTERN_LENGTH_IN_METERS) / PATTERN_LENGTH_IN_METERS vertical_z = (points_3d_arr[2, :, :] % PATTERN_LENGTH_IN_METERS) / PATTERN_LENGTH_IN_METERS vertical = (vertical_x + vertical_z) / 2.0 output[:, :, 0] *= horizontal output[:, :, 1] *= horizontal output[:, :, 2] *= vertical # Fog effect fog = dmap.depthmap_arr * dmap.depthmap_arr fog[fog == 0] = 1 output[:, :, 0] /= fog output[:, :, 1] /= fog output[:, :, 2] /= fog # Ensure pixel clipping np.clip(output, 0., 1., output) # Show the boundary of the child color = [1, 0, 1] # purple if dmap.is_child_fully_visible(mask): color = [0, 1, 0] # green aabb = calculate_boundary(mask == MASK_CHILD) draw_boundary(output, aabb, color) return output
def rotate_and_load_depthmap_with_rgbd(depthmap_fpath: str, rgb_fpath: str, calibration_fpath: str) -> Depthmap: width, height, data, depth_scale, max_confidence, device_pose, header_line = ( Depthmap.read_depthmap_data(depthmap_fpath)) with tempfile.NamedTemporaryFile() as rgb_temp_file: pil_im = Image.open(rgb_fpath) pil_im = pil_im.rotate(90, expand=True) pil_im.save(rgb_temp_file.name, 'png') rgb_array = Depthmap.read_rgb_data(rgb_temp_file.name, width, height) intrinsics = parse_calibration(calibration_fpath) depthmap_arr = None rgb_fpath = None dmap = Depthmap(intrinsics, width, height, data, depthmap_arr, depth_scale, max_confidence, device_pose, rgb_fpath, rgb_array, header_line) return dmap
def render_plot(dmap: Depthmap) -> np.ndarray: # detect floor and child floor: float = dmap.get_floor_level() mask = dmap.segment_child(floor) # dmap.detect_floor(floor) # prepare plots output_plots = [ render_depth(dmap), render_normal(dmap), render_segmentation(floor, mask, dmap), render_confidence(dmap), ] if dmap.has_rgb: highest_point: np.ndarray = dmap.get_highest_point(mask) output_rgb = render_rgb(dmap) output_rgb = blur_face(output_rgb, highest_point, dmap, CHILD_HEAD_HEIGHT_IN_METERS) output_plots.append(output_rgb) return np.concatenate(output_plots, axis=1)
def test_get_highest_point(): dmap = Depthmap.create_from_zip_absolute(DEPTHMAP_FPATH, RGB_FPATH, CALIBRATION_FILE) # Find top of the object floor = dmap.get_floor_level() mask = dmap.segment_child(floor) highest_point = dmap.get_highest_point(mask) # 3D object_height_in_m = highest_point[1] - floor assert 0.3 < object_height_in_m < 0.6
def __init__(self, dmap: Depthmap, rgb_fpath: str): """Create object from depthmap and rgb file path""" self.floor = dmap.get_floor_level() self.rgb = cv2.imread(str(rgb_fpath)) dim = (640, int(self.rgb.shape[0] / self.rgb.shape[1] * 640.0)) self.rgb = cv2.resize(self.rgb, dim, cv2.INTER_AREA) dmap.resize(self.rgb.shape[1], self.rgb.shape[0]) self.dmap = dmap self.rgb_fpath = rgb_fpath cache_fpath = f'{rgb_fpath}-hrnet.json' try: with open(cache_fpath) as json_file: self.persons_coordinates = json.load(json_file) except OSError: self.persons_coordinates = HRNET_MODEL.result_on_artifact_level_from_image( self.rgb, rgb_fpath, '0') with open(cache_fpath, 'w') as json_file: json.dump(self.persons_coordinates, json_file, cls=NumpyEncoder)
def create_layers(depthmap_fpath: str) -> Tuple[np.ndarray, dict]: dmap = Depthmap.create_from_zip_absolute( depthmap_fpath, rgb_fpath=None, calibration_fpath=CALIBRATION_FPATH) depthmap = dmap.depthmap_arr # shape: (width, height) depthmap = preprocess(depthmap) layers = depthmap if not dmap.device_pose: raise InvalidDevicePoseError() metadata = { 'device_pose': dmap.device_pose, 'raw_header': dmap.header, 'angle': dmap.get_angle_between_camera_and_floor(), } return layers, metadata
def predict_height(depthmap_file: str, rgb_file: str, calibration_file: str) -> float: # Check if the child is fully visible dmap = Depthmap.create_from_zip_absolute(depthmap_file, 0, calibration_file) floor = dmap.get_floor_level() mask = dmap.segment_child(floor) if not dmap.is_child_fully_visible(mask): raise Exception('Skipping because the child is not fully visible') # Calculate height highest_point = dmap.get_highest_point(mask) height_in_cm = (highest_point[1] - floor) * 100.0 return height_in_cm
def export_obj(fpath: Union[str, Path], dmap: Depthmap, floor_altitude_in_meters: float, triangulate: bool): """Export .obj file, which can be visualized in tools like Meshlab. floor_altitude_in_meters is the floor altitude to align floor to Y=zero triangulate=True generates OBJ of type mesh triangulate=False generates OBJ of type pointcloud """ fpath = Path(fpath) count = 0 indices = np.zeros((dmap.width, dmap.height)) # Create MTL file (a standart extension of OBJ files to define geometry materials and textures) material_fpath = fpath.with_suffix('.mtl') if dmap.has_rgb: with open(material_fpath, 'w') as f: f.write('newmtl default\n') f.write(f'map_Kd {str(dmap.rgb_fpath.absolute())}\n') with open(fpath, 'w') as f: if dmap.has_rgb: f.write(f'mtllib {material_fpath.name}\n') f.write('usemtl default\n') points_3d_arr = dmap.convert_2d_to_3d_oriented() for x in range(2, dmap.width - 2): for y in range(2, dmap.height - 2): depth = dmap.depthmap_arr[x, y] if not depth: continue count = count + 1 indices[x, y] = count # add index of written vertex into array x_coord = points_3d_arr[0, x, y] y_coord = points_3d_arr[1, x, y] - floor_altitude_in_meters z_coord = points_3d_arr[2, x, y] f.write(f'v {x_coord} {y_coord} {z_coord}\n') f.write(f'vt {x / dmap.width} {y / dmap.height}\n') if triangulate: _do_triangulation(dmap, indices, f) logger.info('Mesh exported into %s', fpath)
def test_is_child_fully_visible(): depthmap_dir = str(TOOLKIT_DIR / 'huawei_p40pro') depthmap_fname = 'depth_dog_1622182020448_100_282.depth' calibration_file = str(TOOLKIT_DIR / 'huawei_p40pro' / 'camera_calibration.txt') dmap = Depthmap.create_from_zip(depthmap_dir, depthmap_fname, 0, calibration_file) # Run standard normal visibility check floor = dmap.get_floor_level() mask = dmap.segment_child(floor) assert dmap.is_child_fully_visible(mask) # Run visibility check when child is covering most of the camera margin = 5 x1 = margin x2 = dmap.width - margin y1 = margin y2 = dmap.height - margin mask[x1:x2, y1:y2] = MASK_CHILD assert not dmap.is_child_fully_visible(mask)
def export_renderable_obj(fpath: Union[str, Path], dmap: Depthmap, floor_altitude_in_meters: float, point_size_in_meters: float): """Export pointcloud as .obj file, which can be rendered in tools like Blender. floor_altitude_in_meters is the floor altitude to align floor to Y=zero point_size_in_meters is point size in meters """ fpath = Path(fpath) count = 1 # Create MTL file (a standart extension of OBJ files to define geometry materials and textures) material_fpath = fpath.with_suffix('.mtl') if dmap.has_rgb: with open(material_fpath, 'w') as f: f.write('newmtl default\n') f.write(f'map_Kd {str(dmap.rgb_fpath.absolute())}\n') with open(fpath, 'w') as f: if dmap.has_rgb: f.write(f'mtllib {material_fpath.name}\n') f.write('usemtl default\n') points_3d_arr = dmap.convert_2d_to_3d_oriented() for x in range(2, dmap.width - 2): for y in range(2, dmap.height - 2): depth = dmap.depthmap_arr[x, y] if not depth: continue x_coord = points_3d_arr[0, x, y] y_coord = points_3d_arr[1, x, y] - floor_altitude_in_meters z_coord = points_3d_arr[2, x, y] _write_obj_cube(f, dmap, x, y, count, x_coord, y_coord, z_coord, point_size_in_meters) count = count + 8 logger.info('Mesh exported into %s', fpath)
def blur_face(data: np.ndarray, highest_point: np.ndarray, dmap: Depthmap, radius: float) -> np.ndarray: """Faceblur of the detected standing child. It uses the highest point of the child and blur all pixels in distance less than radius. Args: data: existing canvas to blur highest_point: 3D point. The surroundings of this point will be blurred. dmap: depthmap radius: radius around highest_point to blur Returns: Canvas like data with face blurred. """ output = np.copy(data) points_3d_arr = dmap.convert_2d_to_3d_oriented() # blur RGB data around face for x in range(dmap.width): for y in range(dmap.height): # count distance from the highest child point depth = dmap.depthmap_arr[x, y] if not depth: continue point = points_3d_arr[:, x, y] vector = point - highest_point distance = abs(vector[0]) + abs(vector[1]) + abs(vector[2]) if distance >= radius: continue # Gausian blur output[x, y] = get_smoothed_pixel(data, x, y, 10) return output
def show(depthmap_dir: str, calibration_file: str, original_orientation=False): global DMAP fig.canvas.manager.set_window_title(depth_filenames[IDX_CUR_DMAP]) rgb_filename = rgb_filenames[IDX_CUR_DMAP] if rgb_filenames else 0 DMAP = Depthmap.create_from_zip(depthmap_dir, depth_filenames[IDX_CUR_DMAP], rgb_filename, calibration_file) angle = DMAP.get_angle_between_camera_and_floor() logging.info('angle between camera and floor is %f', angle) output = render_plot(DMAP) if original_orientation: output = ndimage.rotate(output, 90) plt.imshow(output) plot_names = [ 'depth', 'normals', 'child/background segmentation', 'confidence' ] if DMAP.has_rgb: plot_names.append('rgb') plot_title = '\n'.join( [f'{i}: {plot_name}' for i, plot_name in enumerate(plot_names)]) plt.title(plot_title) plt.show()
def render_prediction_plots(depthmap_file: str, rgb_file: str, calibration_file: str) -> np.array: dmap = Depthmap.create_from_zip_absolute(depthmap_file, rgb_file, calibration_file) return render_plot_debug(dmap)
def get_joints3d(): dmap = Depthmap.create_from_zip_absolute(DEPTHMAP_FPATH, 0, CALIBRATION_FILE) floor = dmap.get_floor_level() dmap.resize(640, 480) return convert_2dskeleton_to_3d(dmap, floor, JOINTS, CONFIDENCES)
def run_evaluation(path: str, metadata_file: str, calibration_file: str, method: str, one_artifact_per_scan: bool): """Runs evaluation process and save results into CSV files Args: path: Path where the RAW dataset is located metadata_file: Path to the CSV file with RAW dataset metadata preprocessed by rgbd_match.py script calibration_file: Path to lens calibration file of the device method: Method for estimation, available are depthmap_toolkit, ml_segmentation and hrnet variants hrnet variants are: hrnet_cv_lying, hrnet_cv_standing, hrnet_ml_lying, hrnet_ml_standing one_artifact_per_scan: True to return one artifact per scan (faster), False to return all artifacts (slower) """ is_standing = True if method == 'depthmap_toolkit': from height_prediction_depthmap_toolkit import predict_height elif method == 'ml_segmentation': from height_prediction_with_ml_segmentation import predict_height elif method == 'hrnet_cv_standing': from height_prediction_with_hrnet import predict_height_cv_standing as predict_height elif method == 'hrnet_cv_lying': from height_prediction_with_hrnet import predict_height_cv_lying as predict_height is_standing = False elif method == 'hrnet_ml_standing': from height_prediction_with_hrnet import predict_height_ml_standing as predict_height elif method == 'hrnet_ml_lying': from height_prediction_with_hrnet import predict_height_ml_lying as predict_height is_standing = False else: raise Exception('Unimplemented method') metadata = filter_metadata(read_csv(metadata_file), is_standing, one_artifact_per_scan) output = [header] rejections = [] keys = metadata.keys() for key_index, key in enumerate(keys): logger.info('Processing %d/%d', key_index + 1, len(keys)) angles = [] heights = [] distances = [] positions = [] directions = [] camera_heights = [] floors = [] last_fail = 0 for artifact in range(len(metadata[key])): data = metadata[key][artifact] try: # Process prediction depthmap_file = (path + data[METADATA_DEPTHMAP]).replace('"', '') rgb_file = (path + data[METADATA_RGB]).replace('"', '') height = predict_height(depthmap_file, rgb_file, calibration_file) check_height_prediction(height, is_standing) # Get additional data dmap = Depthmap.create_from_zip_absolute(depthmap_file, 0, calibration_file) if is_google_tango_resolution(dmap.width, dmap.height): raise Exception('Skipping because it is not a new device data') floor = dmap.get_floor_level() mask = dmap.detect_floor(floor) distance = dmap.get_distance_of_child_from_camera(mask) angle = dmap.get_angle_between_camera_and_floor() position = dmap.device_pose[12:15] direction = dmap.get_camera_direction_angle() camera_height = -position[1] - floor floors.append(floor) camera_heights.append(camera_height) directions.append(direction) positions.append(position) distances.append(distance) heights.append(height) angles.append(angle) except Exception as exc: last_fail = str(exc) continue info = update_output( angles, distances, heights, positions, directions, camera_heights, floors, last_fail, data, output, rejections, is_standing) log_report(generate_report(output, info, is_standing)) write_csv('output.csv', output) write_csv('rejections.csv', rejections) write_csv('report.csv', generate_report(output, info, is_standing))
def test_parse_header_valid_device_pose(caplog): caplog.set_level(logging.INFO) header = '240x180_0.001_7_0.32703257_-0.6232807_-0.6007507_0.3790359_-0.0071239285_-0.0012060514_0.0050547933' Depthmap.parse_header(header) assert "device_pose looks wrong" not in caplog.text
keys = metadata.keys() for key_index, key in enumerate(keys): valid = [] for artifact in range(len(metadata[key])): data = metadata[key][artifact] try: # Run segmentation rgb_file = (path + data[METADATA_RGB]).replace('"', '') im = Image.open(rgb_file).rotate(-90, expand=True) resized_im, seg_map = DEEPLAB_MODEL.run(im) seg_map[seg_map != PERSON_SEGMENTATION] = 0 # Get upscaled depthmap depthmap_file = (path + data[METADATA_DEPTHMAP]).replace('"', '') dmap = Depthmap.create_from_zip_absolute(depthmap_file, 0, calibration_file) dmap.resize(seg_map.shape[0], seg_map.shape[1]) # Count validity cond1 = seg_map == PERSON_SEGMENTATION cond2 = dmap.depthmap_arr > 0.1 count_child_px = len(dmap.depthmap_arr[cond1]) count_valid_px = len(dmap.depthmap_arr[cond1 & cond2]) valid.append(count_valid_px / count_child_px) except Exception: continue value = 0 if len(valid) > 0: value = np.mean(valid) data.append(value)
def create_from_rgbd(cls, depthmap_fpath: str, rgb_fpath: str, calibration_fpath: str) -> 'BodyPose': dmap = Depthmap.create_from_zip_absolute(depthmap_fpath, 0, calibration_fpath) return cls(dmap, rgb_fpath)
def test_parse_header_invalid_device_pose(caplog): caplog.set_level(logging.INFO) header = '240x180_0.001_7_0.5_-0.5_0.5_0.5_0.0_-0.0_-0.0' Depthmap.parse_header(header) assert "device_pose looks wrong" in caplog.text