def test_constructor_sets_properties(self): subject = cam_intr.CameraIntrinsics(width=800, height=600, fx=763.1, fy=759.2, cx=400, cy=300, skew=0.1, k1=0.01, k2=0.002, k3=0.2, p1=0.09, p2=-0.02) self.assertEqual(800, subject.width) self.assertEqual(600, subject.height) self.assertEqual(763.1, subject.fx) self.assertEqual(759.2, subject.fy) self.assertEqual(400, subject.cx) self.assertEqual(300, subject.cy) self.assertEqual(0.1, subject.s) self.assertEqual(0.01, subject.k1) self.assertEqual(0.002, subject.k2) self.assertEqual(0.2, subject.k3) self.assertEqual(0.09, subject.p1) self.assertEqual(-0.02, subject.p2)
def get_camera_calibration(sensor_yaml_path): with open(sensor_yaml_path, 'r') as sensor_file: sensor_data = yaml.load(sensor_file, YamlLoader) d = sensor_data['T_BS']['data'] extrinsics = tf.Transform(np.array([ [d[0], d[1], d[2], d[3]], [d[4], d[5], d[6], d[7]], [d[8], d[9], d[10], d[11]], [d[12], d[13], d[14], d[15]], ])) resolution = sensor_data['resolution'] intrinsics = cam_intr.CameraIntrinsics( width=resolution[0], height=resolution[1], fx=sensor_data['intrinsics'][0], fy=sensor_data['intrinsics'][1], cx=sensor_data['intrinsics'][2], cy=sensor_data['intrinsics'][3], k1=sensor_data['distortion_coefficients'][0], k2=sensor_data['distortion_coefficients'][1], p1=sensor_data['distortion_coefficients'][2], p2=sensor_data['distortion_coefficients'][3] ) return extrinsics, intrinsics
def test_vertical_fov(self): rad_fov = 35.4898465 * np.pi / 180 focal_length = 600 * 1 / (2 * np.tan(rad_fov / 2)) subject = cam_intr.CameraIntrinsics(width=800, height=600, fx=focal_length, fy=focal_length, cx=400, cy=300) self.assertEqual(rad_fov, subject.vertical_fov)
def test_kinect_noise(self): ground_truth_depth_left, ground_truth_depth_right = get_test_images() focal_length = 1 / (2 * np.tan(np.pi / 4)) camera_intrinsics = cam_intr.CameraIntrinsics( ground_truth_depth_left.shape[1], ground_truth_depth_left.shape[0], focal_length * ground_truth_depth_left.shape[1], focal_length * ground_truth_depth_left.shape[1], 0.5 * ground_truth_depth_left.shape[1], 0.5 * ground_truth_depth_left.shape[1]) noisy_depth = depth_noise.kinect_depth_model(ground_truth_depth_left, ground_truth_depth_right, camera_intrinsics) cv2.imshow('test depth linear', noisy_depth / np.max(noisy_depth)) cv2.waitKey(0)
def test_serialize_and_deserialize_works_with_minimal_parameters(self): entity1 = cam_intr.CameraIntrinsics(800, 600, 513.2, 152.3, 400, 300) s_entity1 = entity1.serialize() entity2 = cam_intr.CameraIntrinsics.deserialize(s_entity1) s_entity2 = entity2.serialize() self.assertEqual(entity1, entity2) self.assertEqual(s_entity1, s_entity2) for idx in range(100): # Test that repeated serialization and deserialization does not degrade the information entity2 = cam_intr.CameraIntrinsics.deserialize(s_entity2) s_entity2 = entity2.serialize() self.assertEqual(entity1, entity2) self.assertEqual(s_entity1, s_entity2)
def test_serialize_and_deserialise(self): entity1 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) s_entity1 = entity1.serialize() entity2 = cam_intr.CameraIntrinsics.deserialize(s_entity1) s_entity2 = entity2.serialize() self.assertEqual(entity1, entity2) self.assertEqual(s_entity1, s_entity2) for idx in range(100): # Test that repeated serialization and deserialization does not degrade the information entity2 = cam_intr.CameraIntrinsics.deserialize(s_entity2) s_entity2 = entity2.serialize() self.assertEqual(entity1, entity2) self.assertEqual(s_entity1, s_entity2)
def test_serialize_and_deserialize_works_with_minimal_parameters(self): entity1 = imeta.ImageMetadata( source_type=imeta.ImageSourceType.SYNTHETIC, hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', intrinsics=cam_intr.CameraIntrinsics(800, 600, 652.2, 291, 142.2, 614.4)) s_entity1 = entity1.serialize() entity2 = imeta.ImageMetadata.deserialize(s_entity1) s_entity2 = entity2.serialize() self.assert_metadata_equal(entity1, entity2) self.assertEqual(s_entity1, s_entity2) for idx in range(100): # Test that repeated serialization and deserialization does not degrade the information entity2 = imeta.ImageMetadata.deserialize(s_entity2) s_entity2 = entity2.serialize() self.assert_metadata_equal(entity1, entity2) self.assertEqual(s_entity1, s_entity2)
def get_camera_intrinsics(self): """ Get the current camera intrinsics from the simulator, based on its fov and aspect ratio :return: """ rad_fov = np.pi * self.field_of_view / 180 focal_length = 1 / (2 * np.tan(rad_fov / 2)) # In unreal 4, field of view is whichever is the larger dimension # See: https://answers.unrealengine.com/questions/36550/perspective-camera-and-field-of-view.html if self._resolution[0] > self._resolution[ 1]: # Wider than tall, fov is horizontal FOV focal_length = focal_length * self._resolution[0] else: # Taller than wide, fov is vertical fov focal_length = focal_length * self._resolution[1] return cam_intr.CameraIntrinsics(width=self._resolution[0], height=self._resolution[1], fx=focal_length, fy=focal_length, cx=0.5 * self._resolution[0], cy=0.5 * self._resolution[1])
def test_is_labels_available_is_true_iff_all_images_have_bounding_boxes( self): subject = ic.ImageCollection( images=self.images, type_=core.sequence_type.ImageSequenceType.SEQUENTIAL, db_client_=self.create_mock_db_client()) self.assertTrue(subject.is_labels_available) subject = ic.ImageCollection( type_=core.sequence_type.ImageSequenceType.SEQUENTIAL, images=du.defaults( { 1.7: make_image(metadata=imeta.ImageMetadata( hash_=b'\xf1\x9a\xe2|' + np.random.randint(0, 0xFFFFFFFF).to_bytes(4, 'big'), source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=tf.Transform( location=(800, 2 + np.random.uniform(-1, 1), 3), rotation=(4, 5, 6, 7 + np.random.uniform(-4, 4))), intrinsics=cam_intr.CameraIntrinsics( 800, 600, 550.2, 750.2, 400, 300), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=[], average_scene_depth=90.12)) }, self.images), db_client_=self.create_mock_db_client()) self.assertFalse(subject.is_labels_available)
def test_preserves_other_metadata(self): data = np.array([list(range(i, i + 100)) for i in range(100)]) image = core.image.Image( data=data, metadata=imeta.ImageMetadata( hash_=b'\x04\xe2\x1f\x3d$\x7c\x116', source_type=imeta.ImageSourceType.SYNTHETIC, environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, camera_pose=tf.Transform((1, 3, 4), (0.2, 0.8, 0.2, -0.7)), right_camera_pose=tf.Transform((-10, -20, -30), (0.9, -0.7, 0.5, -0.3)), intrinsics=cam_intr.CameraIntrinsics(data.shape[1], data.shape[0], 147.2, 123.3, 420, 215), right_intrinsics=cam_intr.CameraIntrinsics( data.shape[1], data.shape[0], 168.2, 123.3, 420, 251), lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=True, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=[ imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485') ], average_scene_depth=90.12)) result = self.do_augment(image) self.assertEqual(result.metadata.source_type, image.metadata.source_type) self.assertEqual(result.metadata.environment_type, image.metadata.environment_type) self.assertEqual(result.metadata.light_level, image.metadata.light_level) self.assertEqual(result.metadata.time_of_day, image.metadata.time_of_day) self.assertEqual(result.metadata.height, image.metadata.height) self.assertEqual(result.metadata.width, image.metadata.width) self.assertEqual(result.metadata.camera_pose, image.metadata.camera_pose) self.assertEqual(result.metadata.right_camera_pose, image.metadata.right_camera_pose) self.assertEqual(result.metadata.lens_focal_distance, image.metadata.lens_focal_distance) self.assertEqual(result.metadata.aperture, image.metadata.aperture) self.assertEqual(result.metadata.simulation_world, image.metadata.simulation_world) self.assertEqual(result.metadata.lighting_model, image.metadata.lighting_model) self.assertEqual(result.metadata.texture_mipmap_bias, image.metadata.texture_mipmap_bias) self.assertEqual(result.metadata.normal_maps_enabled, image.metadata.normal_maps_enabled) self.assertEqual(result.metadata.roughness_enabled, image.metadata.roughness_enabled) self.assertEqual(result.metadata.geometry_decimation, image.metadata.geometry_decimation) self.assertEqual(result.metadata.procedural_generation_seed, image.metadata.procedural_generation_seed) self.assertEqual(result.metadata.average_scene_depth, image.metadata.average_scene_depth)
def build_image_metadata(im_data, ground_truth_depth_data, camera_pose, metadata, right_camera_pose=None): """ Construct an image metadata object from the reference images and a metadata dict. Should delete the keys it uses from the metadata, so that the remaining values are 'additional metadata' :param im_data: The image data :param ground_truth_depth_data: Ground-truth depth, if available. :param metadata: The metadata dict :param camera_pose: The camera pose :param right_camera_pose: The pose of the right stereo camera, if available :return: """ # Calculate focal length from fov, np.pi / 4 (rad) = 90 / 2 (deg) = fov / 2 # this is the horizontal field of view, focal_length = 1 / (2 * np.tan(np.pi / 4)) # In unreal 4, field of view is whichever is the larger dimension # See: https://answers.unrealengine.com/questions/36550/perspective-camera-and-field-of-view.html if im_data.shape[1] > im_data.shape[ 0]: # Wider than tall, fov is horizontal FOV focal_length = focal_length * im_data.shape[1] else: # Taller than wide, fov is vertical fov focal_length = focal_length * im_data.shape[0] camera_intrinsics = intrins.CameraIntrinsics(width=im_data.shape[1], height=im_data.shape[0], fx=focal_length, fy=focal_length, cx=0.5 * im_data.shape[1], cy=0.5 * im_data.shape[0]) image_metadata = imeta.ImageMetadata( hash_=xxhash.xxh64(np.ascontiguousarray(im_data)).digest(), source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=camera_pose, right_camera_pose=right_camera_pose, environment_type=imeta.EnvironmentType.INDOOR, light_level=imeta.LightingLevel.EVENLY_LIT, time_of_day=imeta.TimeOfDay.DAY, intrinsics=camera_intrinsics, right_intrinsics=camera_intrinsics if right_camera_pose is not None else None, lens_focal_distance=None, aperture=None, simulation_world=metadata['World Name'], lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=int( metadata['Material Properties']['BaseMipMapBias']), normal_maps_enabled=int( metadata['Material Properties']['NormalQuality']) != 0, roughness_enabled=int( metadata['Material Properties']['RoughnessQuality']) != 0, geometry_decimation=int( metadata['Geometry Detail']['Forced LOD level']), procedural_generation_seed=int( metadata['World Information']['Camera Path']['Path Generation'] ['Random Seed']), labelled_objects=[], average_scene_depth=np.mean(ground_truth_depth_data) if ground_truth_depth_data is not None else None) for key in {'World Name', 'Material Properties', 'Geometry Detail'}: del metadata[key] return image_metadata
def import_dataset(labels_path, db_client, **kwargs): """ Import a real-world dataset with labelled images. :param labels_path: :param db_client: :param kwargs: Additional arguments passed to the image metadata :return: """ if os.path.isdir(labels_path): # Look in the given folder for possible labels files candidates = glob.glob(os.path.join(labels_path, '*.txt')) if len(candidates) >= 1: labels_path = candidates[0] else: # Cannot find the labels file, return None return None builder = dataset.image_collection_builder.ImageCollectionBuilder( db_client) builder.set_non_sequential() with open(labels_path, 'r') as labels_file: base_dir = os.path.dirname(labels_path) for line in labels_file: split = re.split('[, ]', line) if len(split) != 6: continue imfile, x1, y1, x2, y2, label = split label = label.rstrip() im = cv2.imread(os.path.join(base_dir, imfile)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) focal_length = 1 / (2 * np.tan(np.pi / 4) ) # FOV is 90 degrees which is pi / 2 if im.shape[1] > im.shape[0]: focal_length = focal_length * im.shape[1] else: focal_length = focal_length * im.shape[0] labelled_object = imeta.LabelledObject( class_names=(label.lower(), ), bounding_box=(int(x1), int(y1), int(x2) - int(x1), int(y2) - int(y1)), object_id= 'StarbucksCup_170' # This is so I can refer to it later, matches Unreal name ) image_entity = core.image_entity.ImageEntity( data=im, metadata=imeta.ImageMetadata( hash_=xxhash.xxh64(im).digest(), source_type=imeta.ImageSourceType.REAL_WORLD, intrinsics=cam_intr.CameraIntrinsics(width=im.shape[1], height=im.shape[0], fx=focal_length, fy=focal_length, cx=0.5 * im.shape[1], cy=0.5 * im.shape[0]), camera_pose=tf.Transform(), labelled_objects=(labelled_object, ), **kwargs), additional_metadata=None) builder.add_image(image_entity) return builder.save()
def test_clone(self): alt_metadata = { 'hash_': [b'\x1f`\xa8\x8aR\xed\x9f\x0b'], 'source_type': [imeta.ImageSourceType.REAL_WORLD], 'environment_type': [ imeta.EnvironmentType.INDOOR, imeta.EnvironmentType.OUTDOOR_URBAN, imeta.EnvironmentType.OUTDOOR_LANDSCAPE ], 'light_level': [ imeta.LightingLevel.PITCH_BLACK, imeta.LightingLevel.DIM, imeta.LightingLevel.EVENLY_LIT, imeta.LightingLevel.BRIGHT ], 'time_of_day': [ imeta.TimeOfDay.DAWN, imeta.TimeOfDay.MORNING, imeta.TimeOfDay.AFTERNOON, imeta.TimeOfDay.TWILIGHT, imeta.TimeOfDay.NIGHT ], 'camera_pose': [tf.Transform((12, 13, 14), (-0.5, 0.3, 0.8, -0.9))], 'right_camera_pose': [tf.Transform((11, 15, 19), (-0.2, 0.4, 0.6, -0.8))], 'intrinsics': [cam_intr.CameraIntrinsics(900, 910, 894.7, 861.2, 640, 360)], 'right_intrinsics': [cam_intr.CameraIntrinsics(900, 890, 760.45, 405.1, 640, 360)], 'lens_focal_distance': [22], 'aperture': [1.2], 'simulator': [bson.ObjectId()], 'simulation_world': ['TestSimulationWorld2'], 'lighting_model': [imeta.LightingModel.UNLIT], 'texture_mipmap_bias': [2], 'normal_maps_enabled': [False], 'roughness_enabled': [False], 'geometry_decimation': [0.3], 'procedural_generation_seed': [7329], 'average_scene_depth': [102.33], 'base_image': [mock.create_autospec(core.image.Image)], 'transformation_matrix': [np.random.uniform(0, 1, (3, 3))], 'labelled_objects': [ tuple(), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('cat', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 12, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 255), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')) ] } a = self.make_metadata() b = a.clone() self.assert_metadata_equal(a, b) # Change single keys, and make sure it is no longer equal for key, values in alt_metadata.items(): for val in values: b = a.clone(**{key: val}) if key == 'hash_': self.assertEqual(val, b.hash) self.assertNotEqual(a.hash, b.hash) else: self.assertEqual(a.hash, b.hash) if key == 'source_type': self.assertEqual(val, b.source_type) self.assertNotEqual(a.source_type, b.source_type) else: self.assertEqual(a.source_type, b.source_type) if key == 'environment_type': self.assertEqual(val, b.environment_type) self.assertNotEqual(a.environment_type, b.environment_type) else: self.assertEqual(a.environment_type, b.environment_type) if key == 'light_level': self.assertEqual(val, b.light_level) self.assertNotEqual(a.light_level, b.light_level) else: self.assertEqual(a.light_level, b.light_level) if key == 'time_of_day': self.assertEqual(val, b.time_of_day) self.assertNotEqual(a.time_of_day, b.time_of_day) else: self.assertEqual(a.time_of_day, b.time_of_day) if key == 'camera_pose': self.assertEqual(val, b.camera_pose) self.assertNotEqual(a.camera_pose, b.camera_pose) else: self.assertEqual(a.camera_pose, b.camera_pose) if key == 'right_camera_pose': self.assertEqual(val, b.right_camera_pose) self.assertNotEqual(a.right_camera_pose, b.right_camera_pose) else: self.assertEqual(a.right_camera_pose, b.right_camera_pose) if key == 'intrinsics': self.assertEqual(val, b.camera_intrinsics) self.assertNotEqual(a.camera_intrinsics, b.camera_intrinsics) else: self.assertEqual(a.camera_intrinsics, b.camera_intrinsics) self.assertEqual(a.width, b.width) self.assertEqual(a.height, b.height) if key == 'right_intrinsics': self.assertEqual(val, b.right_camera_intrinsics) self.assertNotEqual(a.right_camera_intrinsics, b.right_camera_intrinsics) else: self.assertEqual(a.right_camera_intrinsics, b.right_camera_intrinsics) if key == 'lens_focal_distance': self.assertEqual(val, b.lens_focal_distance) self.assertNotEqual(a.lens_focal_distance, b.lens_focal_distance) else: self.assertEqual(a.lens_focal_distance, b.lens_focal_distance) if key == 'aperture': self.assertEqual(val, b.aperture) self.assertNotEqual(a.aperture, b.aperture) else: self.assertEqual(a.aperture, b.aperture) if key == 'simulation_world': self.assertEqual(val, b.simulation_world) self.assertNotEqual(a.simulation_world, b.simulation_world) else: self.assertEqual(a.simulation_world, b.simulation_world) if key == 'lighting_model': self.assertEqual(val, b.lighting_model) self.assertNotEqual(a.lighting_model, b.lighting_model) else: self.assertEqual(a.lighting_model, b.lighting_model) if key == 'texture_mipmap_bias': self.assertEqual(val, b.texture_mipmap_bias) self.assertNotEqual(a.texture_mipmap_bias, b.texture_mipmap_bias) else: self.assertEqual(a.texture_mipmap_bias, b.texture_mipmap_bias) if key == 'normal_maps_enabled': self.assertEqual(val, b.normal_maps_enabled) self.assertNotEqual(a.normal_maps_enabled, b.normal_maps_enabled) else: self.assertEqual(a.normal_maps_enabled, b.normal_maps_enabled) if key == 'roughness_enabled': self.assertEqual(val, b.roughness_enabled) self.assertNotEqual(a.roughness_enabled, b.roughness_enabled) else: self.assertEqual(a.roughness_enabled, b.roughness_enabled) if key == 'geometry_decimation': self.assertEqual(val, b.geometry_decimation) self.assertNotEqual(a.geometry_decimation, b.geometry_decimation) else: self.assertEqual(a.geometry_decimation, b.geometry_decimation) if key == 'procedural_generation_seed': self.assertEqual(val, b.procedural_generation_seed) self.assertNotEqual(a.procedural_generation_seed, b.procedural_generation_seed) else: self.assertEqual(a.procedural_generation_seed, b.procedural_generation_seed) if key == 'labelled_objects': self.assertEqual(val, b.labelled_objects) self.assertNotEqual(a.labelled_objects, b.labelled_objects) else: self.assertEqual(a.labelled_objects, b.labelled_objects) if key == 'average_scene_depth': self.assertEqual(val, b.average_scene_depth) self.assertNotEqual(a.average_scene_depth, b.average_scene_depth) else: self.assertEqual(a.average_scene_depth, b.average_scene_depth)
def make_metadata(self, **kwargs): kwargs = du.defaults( kwargs, { 'hash_': b'\xa5\xc9\x08\xaf$\x0b\x116', 'source_type': imeta.ImageSourceType.SYNTHETIC, 'environment_type': imeta.EnvironmentType.INDOOR_CLOSE, 'light_level': imeta.LightingLevel.WELL_LIT, 'time_of_day': imeta.TimeOfDay.DAY, 'camera_pose': tf.Transform((1, 3, 4), (0.2, 0.8, 0.2, -0.7)), 'right_camera_pose': tf.Transform((-10, -20, -30), (0.9, -0.7, 0.5, -0.3)), 'intrinsics': cam_intr.CameraIntrinsics(700, 700, 654.2, 753.3, 400, 300), 'right_intrinsics': cam_intr.CameraIntrinsics(700, 710, 732.1, 612.3, 400, 300), 'lens_focal_distance': 5, 'aperture': 22, 'simulator': bson.ObjectId('5a14cf0e36ed1e17a55f1e35'), 'simulation_world': 'TestSimulationWorld', 'lighting_model': imeta.LightingModel.LIT, 'texture_mipmap_bias': 1, 'normal_maps_enabled': True, 'roughness_enabled': True, 'geometry_decimation': 0.8, 'procedural_generation_seed': 16234, 'labelled_objects': [ imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485') ], 'average_scene_depth': 90.12, 'base_image': self.parent_image, 'transformation_matrix': np.array([[0.19882871, 0.58747441, 0.90084303], [0.6955363, 0.48193339, 0.09503605], [0.20549805, 0.6110534, 0.61145574]]) }) return imeta.ImageMetadata(**kwargs)
def test_hash(self): entity1 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(810, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 610, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 561.2, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 142.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 600, 300, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 200, 0.1, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.2, 0.01, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.03, 0.002, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.017, 0, 0.01, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, -0.19, -0.02) self.assertNotEqual(hash(entity1), hash(entity2)) entity2 = cam_intr.CameraIntrinsics(800, 600, 763.1, 759.2, 400, 300, 0.1, 0.01, 0.002, 0, 0.01, 0.8) self.assertNotEqual(hash(entity1), hash(entity2))
def setUp(self): self.left_pose = tf.Transform((1, 2, 3), (0.5, 0.5, -0.5, -0.5)) self.right_pose = tf.Transform(location=self.left_pose.find_independent((0, 0, 15)), rotation=self.left_pose.rotation_quat(w_first=False), w_first=False) self.metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=self.left_pose, right_camera_pose=self.right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 17, 22, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 8, 12, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) self.left_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.image = im.StereoImage(left_data=self.left_data, right_data=self.right_data, metadata=self.metadata) self.full_left_pose = tf.Transform((4, 5, 6), (-0.5, 0.5, -0.5, 0.5)) self.full_right_pose = tf.Transform(location=self.left_pose.find_independent((0, 0, 15)), rotation=self.left_pose.rotation_quat(w_first=False), w_first=False) self.full_metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=self.full_left_pose, right_camera_pose=self.full_right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 17, 22, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 8, 12, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) self.full_left_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.full_right_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.left_gt_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.right_gt_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.left_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.right_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.left_labels = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_labels = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.left_normals = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_normals = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.full_image = im.StereoImage( left_data=self.full_left_data, right_data=self.full_right_data, left_depth_data=self.left_depth, right_depth_data=self.right_depth, left_ground_truth_depth_data=self.left_gt_depth, right_ground_truth_depth_data=self.right_gt_depth, left_labels_data=self.left_labels, right_labels_data=self.right_labels, left_world_normals_data=self.left_normals, right_world_normals_data=self.right_normals, metadata=self.full_metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } } )
def make_image(**kwargs): """ Make a mock image, randomly :param kwargs: Fixed kwargs to the constructor :return: a new image object """ kwargs = du.defaults( kwargs, { 'id_': bson.objectid.ObjectId(), 'data': np.random.randint(0, 255, (32, 32, 3), dtype='uint8'), 'metadata': imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=tf.Transform(location=np.random.uniform( -1000, 1000, 3), rotation=np.random.uniform(-1, 1, 4)), intrinsics=cam_intr.CameraIntrinsics(800, 600, 782.5, 781.3, 320, 300), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=np.random.uniform(10, 10000), aperture=np.random.uniform(1, 22), simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=np.random.randint(0, 8), normal_maps_enabled=bool(np.random.randint(0, 2)), roughness_enabled=bool(np.random.randint(0, 2)), geometry_decimation=np.random.uniform(0, 1), procedural_generation_seed=np.random.randint(10000), labelled_objects=(imeta.LabelledObject( class_names=('car', ), bounding_box=tuple(np.random.randint(0, 100, 4)), label_color=tuple(np.random.randint(0, 255, 3)), relative_pose=tf.Transform( np.random.uniform(-1000, 1000, 3), np.random.uniform(-1, 1, 4)), object_id='Car-002'), imeta.LabelledObject( class_names=('cat', ), bounding_box=tuple( np.random.randint(0, 100, 4)), label_color=tuple( np.random.randint(0, 255, 4)), relative_pose=tf.Transform( np.random.uniform(-1000, 1000, 3), np.random.uniform(-1, 1, 4)), object_id='cat-090')), average_scene_depth=np.random.uniform(10000)), 'additional_metadata': { 'Source': 'Generated', 'Resolution': { 'width': 32, 'height': 32 }, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } }, 'depth_data': np.random.uniform(0, 1, (32, 32)), 'labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'world_normals_data': np.random.uniform(0, 1, (32, 32, 3)) }) return ie.ImageEntity(**kwargs)
def test_make_from_images(self): left_pose = tf.Transform((1, 2, 3), (0.5, 0.5, -0.5, -0.5)) right_pose = tf.Transform(location=left_pose.find_independent((0, 0, 15)), rotation=left_pose.rotation_quat(w_first=False), w_first=False) metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=left_pose, right_camera_pose=right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 15, 21, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 13, 7, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) left_image = im.Image( data=self.left_data, depth_data=self.left_depth, labels_data=self.left_labels, world_normals_data=self.left_normals, metadata=metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } } ) right_image = im.Image( data=self.right_data, depth_data=self.right_depth, labels_data=self.right_labels, world_normals_data=self.right_normals, metadata=metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 1, 'RoughnessQuality': False }, 'skeletons': 'There is already one inside you' } ) stereo_image = im.StereoImage.make_from_images(left_image, right_image) self.assertEqual(stereo_image.additional_metadata, du.defaults(left_image.additional_metadata, right_image.additional_metadata)) self.assertNPEqual(stereo_image.left_camera_location, left_image.camera_location) self.assertNPEqual(stereo_image.left_camera_orientation, left_image.camera_orientation) self.assertNPEqual(stereo_image.left_data, left_image.data) self.assertNPEqual(stereo_image.left_depth_data, left_image.depth_data) self.assertNPEqual(stereo_image.left_labels_data, left_image.labels_data) self.assertNPEqual(stereo_image.left_world_normals_data, left_image.world_normals_data) self.assertNPEqual(stereo_image.right_camera_location, right_image.camera_location) self.assertNPEqual(stereo_image.right_camera_orientation, right_image.camera_orientation) self.assertNPEqual(stereo_image.right_data, right_image.data) self.assertNPEqual(stereo_image.right_depth_data, right_image.depth_data) self.assertNPEqual(stereo_image.right_labels_data, right_image.labels_data) self.assertNPEqual(stereo_image.right_world_normals_data, right_image.world_normals_data)
def make_stereo_image(index=1, **kwargs): kwargs = du.defaults( kwargs, { 'id_': bson.objectid.ObjectId(), 'left_data': np.random.uniform(0, 255, (32, 32, 3)), 'right_data': np.random.uniform(0, 255, (32, 32, 3)), 'metadata': imeta.ImageMetadata( hash_=b'\xf1\x9a\xe2|' + np.random.randint(0, 0xFFFFFFFF).to_bytes(4, 'big'), source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=tf.Transform( location=(1 + 100 * index, 2 + np.random.uniform(-1, 1), 3), rotation=(4, 5, 6, 7 + np.random.uniform(-4, 4))), right_camera_pose=tf.Transform( location=(1 + 100 * index, 12 + np.random.uniform(-1, 1), 3), rotation=(4, 5, 6, 7 + np.random.uniform(-4, 4))), intrinsics=cam_intr.CameraIntrinsics(800, 600, 550.2, 750.2, 400, 300), right_intrinsics=cam_intr.CameraIntrinsics( 800, 600, 550.2, 750.2, 400, 300), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=(imeta.LabelledObject( class_names=('car', ), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject( class_names=('cat', ), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform( (378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090')), average_scene_depth=90.12), 'additional_metadata': { 'Source': 'Generated', 'Resolution': { 'width': 1280, 'height': 720 }, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } }, 'left_depth_data': np.random.uniform(0, 1, (32, 32)), 'right_depth_data': np.random.uniform(0, 1, (32, 32)), 'left_labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'right_labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'left_world_normals_data': np.random.uniform(0, 1, (32, 32, 3)), 'right_world_normals_data': np.random.uniform(0, 1, (32, 32, 3)) }) return ie.StereoImageEntity(**kwargs)
def import_dataset(root_folder, db_client, sequence_number=0): """ Load a KITTI image sequences into the database. :return: """ sequence_name = "{0:02}".format(sequence_number) if not os.path.isdir(root_folder) and os.path.isdir( os.path.join(root_folder, sequence_name)): return None data = pykitti.odometry(root_folder, sequence=sequence_name) builder = dataset.image_collection_builder.ImageCollectionBuilder( db_client) # dataset.calib: Calibration data are accessible as a named tuple # dataset.timestamps: Timestamps are parsed into a list of timedelta objects # dataset.poses: Generator to load ground truth poses T_w_cam0 # dataset.camN: Generator to load individual images from camera N # dataset.gray: Generator to load monochrome stereo pairs (cam0, cam1) # dataset.rgb: Generator to load RGB stereo pairs (cam2, cam3) # dataset.velo: Generator to load velodyne scans as [x,y,z,reflectance] for left_image, right_image, timestamp, pose in zip( data.cam2, data.cam3, data.timestamps, data.poses): camera_pose = make_camera_pose(pose) # camera pose is for cam0, we want cam2, which is 6cm (0.06m) to the left camera_pose = camera_pose.find_independent( tf.Transform(location=(0, 0.06, 0), rotation=(0, 0, 0, 1), w_first=False)) # Stereo offset is 0.54m (http://www.cvlibs.net/datasets/kitti/setup.php) right_camera_pose = camera_pose.find_independent( tf.Transform(location=(0, -0.54, 0), rotation=(0, 0, 0, 1), w_first=False)) camera_intrinsics = intrins.CameraIntrinsics( height=left_image.shape[0], width=left_image.shape[1], fx=data.calib.K_cam2[0, 0], fy=data.calib.K_cam2[1, 1], cx=data.calib.K_cam2[0, 2], cy=data.calib.K_cam2[1, 2]) right_camera_intrinsics = intrins.CameraIntrinsics( height=right_image.shape[0], width=right_image.shape[1], fx=data.calib.K_cam3[0, 0], fy=data.calib.K_cam3[1, 1], cx=data.calib.K_cam3[0, 2], cy=data.calib.K_cam3[1, 2]) builder.add_image(image=core.image_entity.StereoImageEntity( left_data=left_image, right_data=right_image, metadata=imeta.ImageMetadata( hash_=xxhash.xxh64(left_image).digest(), camera_pose=make_camera_pose(pose), right_camera_pose=right_camera_pose, intrinsics=camera_intrinsics, right_intrinsics=right_camera_intrinsics, source_type=imeta.ImageSourceType.REAL_WORLD, environment_type=imeta.EnvironmentType.OUTDOOR_URBAN, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.AFTERNOON, ), additional_metadata={ 'dataset': 'KITTI', 'sequence': sequence_number }), timestamp=timestamp.total_seconds()) return builder.save()