class DSO(VisionSystem): """ Python wrapper for Direct Sparse Odometry (DSO) See https://github.com/JakobEngel/dso Bound to python using SWIG """ rectification_mode = EnumField(RectificationMode, required=True) rectification_intrinsics = fields.EmbeddedDocumentField(CameraIntrinsics, required=True) columns = ColumnList( rectification_mode=attrgetter('rectification_mode'), undistort_mode=None, in_height=None, in_width=None, in_fx=None, in_fy=None, in_cx=None, in_cy=None, in_p1=None, in_p2=None, in_k1=None, in_k2=None, out_width=attrgetter('rectification_intrinsics.width'), out_height=attrgetter('rectification_intrinsics.height'), out_fx=lambda obj: obj.rectification_intrinsics.fx if obj.rectification_mode is RectificationMode.CALIB else float('nan'), out_fy=lambda obj: obj.rectification_intrinsics.fy if obj.rectification_mode is RectificationMode.CALIB else float('nan'), out_cx=lambda obj: obj.rectification_intrinsics.cx if obj.rectification_mode is RectificationMode.CALIB else float('nan'), out_cy=lambda obj: obj.rectification_intrinsics.cy if obj.rectification_mode is RectificationMode.CALIB else float('nan') ) def __init__(self, *args, **kwargs): super(DSO, self).__init__(*args, **kwargs) self._intrinsics = None self._framerate = 30 self._has_photometric_calibration = False self._undistorter = None self._output_wrapper = None self._system = None self._start_time = None self._image_index = 0 self._frame_results = None self._processing_start_times = None @classmethod def is_deterministic(cls) -> StochasticBehaviour: """ DSO is deterministic with multi-threading disabled. There is a bug in one of the mutli-threaded accumulates that is order important, so without it, the system is determinisitic. :return: StochasticBehaviour.DETERMINISTIC """ return StochasticBehaviour.DETERMINISTIC def is_image_source_appropriate(self, image_source: ImageSource) -> bool: """ Is the dataset appropriate for testing this vision system. This will depend on which sensor mode ORB_SLAM is configured in, stereo mode will require stereo to be available, while RGB-D mode will require depth to be available. :param image_source: The source for images that this system will potentially be run with. :return: True iff the particular dataset is appropriate for this vision system. :rtype: bool """ return image_source.sequence_type == ImageSequenceType.SEQUENTIAL and ( self.rectification_mode is not RectificationMode.NONE or check_resolution(image_source.camera_intrinsics) ) def get_columns(self) -> typing.Set[str]: """ Get the set of available properties for this system. Pass these to "get_properties", below. :return: """ return set(self.columns.keys()) def get_properties(self, columns: typing.Iterable[str] = None, settings: typing.Mapping[str, typing.Any] = None) -> typing.Mapping[str, typing.Any]: """ Get the values of the requested properties :param columns: The columns to get the values of :param settings: The settings stored in the trial result. :return: """ if columns is None: columns = self.columns.keys() if settings is None: settings = {} return { col_name: self.get_property(col_name, settings) for col_name in columns if col_name in self.columns } def get_property(self, column_name: str, settings: typing.Mapping[str, typing.Any]): """ Get the value of a particular column on this model, given some settings. Used in get_properties, to handle various special cases. :param column_name: :param settings: :return: """ if column_name is 'rectification_mode': return self.rectification_mode elif self.rectification_mode != RectificationMode.CALIB and \ column_name in {'out_fx', 'out_fy', 'out_cx', 'out_cy'}: return float('nan') elif column_name in settings: return settings[column_name] return self.columns.get_value(self, column_name) def set_camera_intrinsics(self, camera_intrinsics: CameraIntrinsics, average_timestep: float) -> None: """ Set the intrinsics of the camera using :param camera_intrinsics: A metadata.camera_intrinsics.CameraIntriniscs object :param average_timestep: The average time interval between frames. Used to configure ORB_SLAM2 :return: """ if self._system is None: self._intrinsics = camera_intrinsics self._framerate = 1 / average_timestep def start_trial(self, sequence_type: ImageSequenceType, seed: int = 0) -> None: """ Start a trial with this system. After calling this, we can feed images to the system. When the trial is complete, call finish_trial to get the result. :param sequence_type: Are the provided images part of a sequence, or just unassociated pictures. :param seed: A random seed. Not used, but may be given. :return: void """ if sequence_type is not ImageSequenceType.SEQUENTIAL: raise RuntimeError("Cannot start trial with {0} image source".format(sequence_type.name)) if self._intrinsics is None: raise RuntimeError("Cannot start trial, intrinsics have not been provided yet") self._start_time = time.time() self._frame_results = {} self._processing_start_times = {} # Figure out mode and preset for DSO # mode: # mode = 0 - use iff a photometric calibration exists(e.g.TUM monoVO dataset). # mode = 1 - use iff NO photometric calibration exists(e.g.ETH EuRoC MAV dataset). # mode = 2 - use iff images are not photometrically distorted(e.g.syntheticdatasets). # preset: # preset = 0 - default settings (2k pts etc.), not enforcing real - time execution # preset = 1 - default settings (2k pts etc.), enforcing 1x real - time execution # WARNING: These two overwrite image resolution with 424 x 320. # preset = 2 - fast settings (800 pts etc.), not enforcing real - time execution. # preset = 3 - fast settings (800 pts etc.), enforcing 5x real - time execution mode = 1 preset = 0 dso_configure(preset=preset, mode=mode, quiet=True, nolog=True) # Build the undistorter, this will preprocess images and remove distortion if self.rectification_mode is RectificationMode.NONE: # For no undistortion, simply pass through, out resolution is always self._undistorter = make_undistort_from_mode( self._intrinsics, self.rectification_mode, self._intrinsics.width, self._intrinsics.height) elif self.rectification_mode is RectificationMode.CALIB: # CALIB rectification uses the full intrinsics self._undistorter = make_undistort_from_out_intrinsics(self._intrinsics, self.rectification_intrinsics) else: # Otherwise, build an undistorter that crops to the configured fixed resolution self._undistorter = make_undistort_from_mode( self._intrinsics, self.rectification_mode, self.rectification_intrinsics.width, self.rectification_intrinsics.height ) if mode is not 0: self._undistorter.setNoPhotometricCalibration() self._undistorter.applyGlobalConfig() # Need to do this to set camera intrinsics # Make an output wrapper to accumulate output information self._output_wrapper = DSOOutputWrapper() # Build the system itself. self._system = DSOSystem() self._system.outputWrapper.append(self._output_wrapper) self._start_time = time.time() self._image_index = 0 def process_image(self, image: Image, timestamp: float) -> None: """ Process an image as part of the current run. Should automatically start a new trial if none is currently started. :param image: The image object for this frame :param timestamp: A timestamp or index associated with this image. Sometimes None. :return: void """ if self._undistorter is None: raise RuntimeError("Cannot process image, trial has not started yet. Call 'start_trial'") image_data = image_utils.to_uint_image(image_utils.convert_to_grey(image.pixels)) dso_img = self._undistorter.undistort_greyscale(image_data, 0, timestamp, 1.0) self._processing_start_times[timestamp] = time.time() self._system.addActiveFrame(dso_img, self._image_index) self._image_index += 1 self._frame_results[timestamp] = FrameResult( timestamp=timestamp, image=image.pk, pose=image.camera_pose, tracking_state=TrackingState.NOT_INITIALIZED, processing_time=np.nan ) def finish_trial(self) -> SLAMTrialResult: """ End the current trial, returning a trial result. Return none if no trial is started. :return: :rtype TrialResult: """ if self._system is None: raise RuntimeError("Cannot finish trial, no trial started. Call 'start_trial'") # Wait for the system to finish self._system.blockUntilMappingIsFinished() # Collate the frame results unrecognised_timestamps = set() for timestamp, trans, rot, finish_time in self._output_wrapper.frame_deltas: if timestamp in self._frame_results: self._frame_results[timestamp].estimated_pose = make_pose(trans, rot) self._frame_results[timestamp].processing_time = finish_time - self._processing_start_times[timestamp] self._frame_results[timestamp].tracking_state = TrackingState.OK else: unrecognised_timestamps.add(timestamp) if len(unrecognised_timestamps) > 0: valid_timestamps = np.array(list(self._frame_results.keys())) logging.getLogger(__name__).warning("Got inconsistent timestamps:\n" + '\n'.join( '{0} (closest was {1})'.format( unrecognised_timestamp, _find_closest(unrecognised_timestamp, valid_timestamps) ) for unrecognised_timestamp in unrecognised_timestamps )) # Organize the tracking state, it is NOT_INITIALIZED until we are first found, then it is LOST found = False for timestamp in sorted(self._frame_results.keys()): if self._frame_results[timestamp].tracking_state is TrackingState.OK: found = True elif found and self._frame_results[timestamp].tracking_state is TrackingState.NOT_INITIALIZED: self._frame_results[timestamp].tracking_state = TrackingState.LOST # Clean up self._undistorter = None self._system = None self._output_wrapper = None result = SLAMTrialResult( system=self.pk, success=len(self._frame_results) > 0, results=[self._frame_results[timestamp] for timestamp in sorted(self._frame_results.keys())], has_scale=False, settings=self.make_settings() ) result.run_time = time.time() - self._start_time self._frame_results = None self._start_time = None return result def make_settings(self): undistort_mode = "Pinhole" if ( self._intrinsics.k1 == 0 and self._intrinsics.k2 == 0 and self._intrinsics.p1 == 0 and self._intrinsics.p2 == 0) else "RadTan" settings = { 'rectification_mode': self.rectification_mode.name, 'undistort_mode': undistort_mode, 'in_width': self._intrinsics.width, 'in_height': self._intrinsics.height, 'in_fx': self._intrinsics.fx, 'in_fy': self._intrinsics.fy, 'in_cx': self._intrinsics.cx, 'in_cy': self._intrinsics.cy, 'in_p1': self._intrinsics.p1, 'in_p2': self._intrinsics.p2, 'in_k1': self._intrinsics.k1, 'in_k2': self._intrinsics.k2 } if self.rectification_mode is RectificationMode.NONE: settings['out_width'] = self._intrinsics.width settings['out_height'] = self._intrinsics.height else: settings['out_width'] = self.rectification_intrinsics.width settings['out_height'] = self.rectification_intrinsics.height if self.rectification_mode is RectificationMode.CALIB: settings['out_fx'] = self.rectification_intrinsics.fx settings['out_fy'] = self.rectification_intrinsics.fy settings['out_cx'] = self.rectification_intrinsics.cx settings['out_cy'] = self.rectification_intrinsics.cy return settings @classmethod def get_instance( cls, rectification_mode: RectificationMode = None, rectification_intrinsics: CameraIntrinsics = None ) -> 'DSO': """ Get an instance of this vision system, with some parameters, pulling from the database if possible, or construct a new one if needed. It is the responsibility of subclasses to ensure that as few instances of each system as possible exist within the database. Does not save the returned object, you'll usually want to do that straight away. :return: """ if rectification_mode is None: raise ValueError("Cannot search for DSO without rectification mode") if rectification_intrinsics is None: raise ValueError("Cannot search for DSO without intrinsics") if rectification_mode is not RectificationMode.NONE and not check_resolution(rectification_intrinsics): # Check the resolution we're rectifying to. If it will be invalid, raise an exception raise ValueError(f"Cannot {rectification_mode.name} to resolution " f"{rectification_intrinsics.width}x{rectification_intrinsics.height}, it is invalid") # Look for existing objects with the same settings query = { 'rectification_mode': rectification_mode.name, 'rectification_intrinsics.width': rectification_intrinsics.width, 'rectification_intrinsics.height': rectification_intrinsics.height } if rectification_mode is RectificationMode.CALIB: # When using CALIB rectification, the other intrinsics matter query['rectification_intrinsics.fx'] = rectification_intrinsics.fx query['rectification_intrinsics.fy'] = rectification_intrinsics.fy query['rectification_intrinsics.cx'] = rectification_intrinsics.cx query['rectification_intrinsics.cy'] = rectification_intrinsics.cy all_objects = DSO.objects.raw(query) if all_objects.count() > 0: return all_objects.first() # There isn't an existing system with those settings, make a new one. obj = cls( rectification_mode=rectification_mode, rectification_intrinsics=rectification_intrinsics ) return obj
class FrameError(pymodm.MongoModel): """ All the errors from a single frame One of these gets created for each frame for each trial """ trial_result = fields.ReferenceField(TrialResult, required=True) image = fields.ReferenceField(Image, required=True, on_delete=fields.ReferenceField.CASCADE) repeat = fields.IntegerField(required=True) timestamp = fields.FloatField(required=True) motion = TransformField(required=True) processing_time = fields.FloatField(default=np.nan) loop_distances = fields.ListField(fields.FloatField(), blank=True) loop_angles = fields.ListField(fields.FloatField(), blank=True) num_features = fields.IntegerField(default=0) num_matches = fields.IntegerField(default=0) tracking = EnumField(TrackingState, default=TrackingState.OK) absolute_error = fields.EmbeddedDocumentField(PoseError, blank=True) relative_error = fields.EmbeddedDocumentField(PoseError, blank=True) noise = fields.EmbeddedDocumentField(PoseError, blank=True) systemic_error = fields.EmbeddedDocumentField(PoseError, blank=True) system_properties = fields.DictField(blank=True) image_properties = fields.DictField(blank=True) columns = ColumnList( repeat=attrgetter('repeat'), timestamp=attrgetter('timestamp'), tracking=attrgetter('is_tracking'), processing_time=attrgetter('processing_time'), motion_x=attrgetter('motion.x'), motion_y=attrgetter('motion.y'), motion_z=attrgetter('motion.z'), motion_length=lambda obj: np.linalg.norm(obj.motion.location), motion_roll=lambda obj: obj.motion.euler[0], motion_pitch=lambda obj: obj.motion.euler[1], motion_yaw=lambda obj: obj.motion.euler[2], motion_rotation=lambda obj: tf.quat_angle(obj.motion.rotation_quat(True)), num_features=attrgetter('num_features'), num_matches=attrgetter('num_matches'), is_loop_closure=lambda obj: len(obj.loop_distances) > 0, num_loop_closures=lambda obj: len(obj.loop_distances), max_loop_closure_distance=lambda obj: np.max(obj.loop_distances) if len(obj.loop_distances) > 0 else np.nan, min_loop_closure_distance=lambda obj: np.min(obj.loop_distances) if len(obj.loop_distances) > 0 else np.nan, mean_loop_closure_distance=lambda obj: np.mean(obj.loop_distances) if len(obj.loop_distances) > 0 else np.nan, max_loop_closure_angle=lambda obj: np.max(obj.loop_angles) if len(obj.loop_angles) > 0 else np.nan, min_loop_closure_angle=lambda obj: np.min(obj.loop_angles) if len(obj.loop_angles) > 0 else np.nan, mean_loop_closure_angle=lambda obj: np.mean(obj.loop_angles) if len(obj.loop_angles) > 0 else np.nan, abs_error_x=lambda obj: obj.absolute_error.x if obj.absolute_error is not None else np.nan, abs_error_y=lambda obj: obj.absolute_error.y if obj.absolute_error is not None else np.nan, abs_error_z=lambda obj: obj.absolute_error.z if obj.absolute_error is not None else np.nan, abs_error_length=lambda obj: obj.absolute_error.length if obj.absolute_error is not None else np.nan, abs_error_direction=lambda obj: obj.absolute_error.direction if obj.absolute_error is not None else np.nan, abs_rot_error=lambda obj: obj.absolute_error.rot if obj.absolute_error is not None else np.nan, trans_error_x=lambda obj: obj.relative_error.x if obj.relative_error is not None else np.nan, trans_error_y=lambda obj: obj.relative_error.y if obj.relative_error is not None else np.nan, trans_error_z=lambda obj: obj.relative_error.z if obj.relative_error is not None else np.nan, trans_error_length=lambda obj: obj.relative_error.length if obj.relative_error is not None else np.nan, trans_error_direction=lambda obj: obj.relative_error.direction if obj.relative_error is not None else np.nan, rot_error=lambda obj: obj.relative_error.rot if obj.relative_error is not None else np.nan, trans_noise_x=lambda obj: obj.noise.x if obj.noise is not None else np.nan, trans_noise_y=lambda obj: obj.noise.y if obj.noise is not None else np.nan, trans_noise_z=lambda obj: obj.noise.z if obj.noise is not None else np.nan, trans_noise_length=lambda obj: obj.noise.length if obj.noise is not None else np.nan, trans_noise_direction=lambda obj: obj.noise.direction if obj.noise is not None else np.nan, rot_noise=lambda obj: obj.noise.rot if obj.noise is not None else np.nan, systemic_x=lambda obj: obj.systemic_error.x if obj.systemic_error is not None else np.nan, systemic_y=lambda obj: obj.systemic_error.y if obj.systemic_error is not None else np.nan, systemic_z=lambda obj: obj.systemic_error.z if obj.systemic_error is not None else np.nan, systemic_length=lambda obj: obj.systemic_error.length if obj.systemic_error is not None else np.nan, systemic_direction=lambda obj: obj.systemic_error.direction if obj.systemic_error is not None else np.nan, systemic_rot=lambda obj: obj.systemic_error.rot if obj.systemic_error is not None else np.nan, ) # For each of the columns listed above, get the properties necessary to retrieve that column. # This lets us exclude all the other fields, and reduce query size required_fields = dict( repeat=('repeat',), timestamp=('timestamp',), tracking=('is_tracking',), processing_time=('processing_time',), motion_x=('motion',), motion_y=('motion',), motion_z=('motion',), motion_length=('motion',), motion_roll=('motion',), motion_pitch=('motion',), motion_yaw=('motion',), motion_rotation=('motion',), num_features=('num_features',), num_matches=('num_matches',), is_loop_closure=('loop_distances',), num_loop_closures=('loop_distances',), max_loop_closure_distance=('loop_distances',), min_loop_closure_distance=('loop_distances',), mean_loop_closure_distance=('loop_distances',), max_loop_closure_angle=('loop_angles',), min_loop_closure_angle=('loop_angles',), mean_loop_closure_angle=('loop_angles',), abs_error_x=('absolute_error',), abs_error_y=('absolute_error',), abs_error_z=('absolute_error',), abs_error_length=('absolute_error',), abs_error_direction=('absolute_error',), abs_rot_error=('absolute_error',), trans_error_x=('relative_error',), trans_error_y=('relative_error',), trans_error_z=('relative_error',), trans_error_length=('relative_error',), trans_error_direction=('relative_error',), rot_error=('relative_error',), trans_noise_x=('noise',), trans_noise_y=('noise',), trans_noise_z=('noise',), trans_noise_length=('noise',), trans_noise_direction=('noise',), rot_noise=('noise',), systemic_x=('systemic_error',), systemic_y=('systemic_error',), systemic_z=('systemic_error',), systemic_length=('systemic_error',), systemic_direction=('systemic_error',), systemic_rot=('systemic_error',), ) @property def is_tracking(self) -> bool: return self.tracking is TrackingState.OK def get_columns(self) -> typing.Set[str]: """ Get the columns available to this frame error result :return: """ return set(self.columns.keys()) | set(self.system_properties.keys()) | set(self.image_properties.keys()) def get_properties(self, columns: typing.Iterable[str] = None, other_properties: dict = None): """ Flatten the frame error to a dictionary. This is used to construct rows in a Pandas data frame, so the keys are column names Handles pulling data from the linked system and linked image :return: """ if other_properties is None: other_properties = {} if columns is None: columns = set(self.columns.keys()) | set(self.system_properties.keys()) | set(self.image_properties.keys()) error_properties = { column_name: self.columns.get_value(self, column_name) for column_name in columns if column_name in self.columns } image_properties = { column: self.image_properties[column] for column in columns if column in self.image_properties } system_properties = { column: self.system_properties[column] for column in columns if column in self.system_properties } return { **other_properties, **image_properties, **system_properties, **error_properties } @classmethod def load_minimal_for_columns( cls, error_ids: typing.Iterable[bson.ObjectId], columns: typing.Iterable[str] = None ) -> typing.List['FrameError']: """ Given a set of FrameError ids, load the FrameError objects. If we have a set of columns as well (we will), load only partial objects that have just enough data to compute the requested columns. Completes the loading in this method with the list call -- does not return a queryset. :param error_ids: The list of error ids to load :param columns: The columns, from which we derive the set of FrameError properties to load :return: A list of """ queryset = cls.objects.raw({'_id': {'$in': list(error_ids)}}) if columns is not None: # Limit to only the fields necessary to compute the columns we're interested in columns = set(columns) fields_to_include = { field for column in columns if column in cls.required_fields for field in cls.required_fields[column] } # If the column is not in image_properties or system_properties, it will not be included fields_to_include.update( template.format(column) for template in ('image_properties.{0}', 'system_properties.{0}') for column in columns - set(cls.required_fields.keys()) ) if len(fields_to_include) > 0: queryset = queryset.only(*fields_to_include) # Run the query return list(queryset)
class ImageCollection(arvet.core.image_source.ImageSource, pymodm.MongoModel): """ A collection of images stored in the database. This can be a sequential set of images like a video, or a random sampling of different pictures. """ images = pymodm.fields.ListField( pymodm.ReferenceField(Image, required=True, on_delete=pymodm.fields.ReferenceField.CASCADE), required=True ) timestamps = pymodm.fields.ListField(pymodm.fields.FloatField(required=True), required=True) sequence_type = EnumField(ImageSequenceType, required=True) image_group = pymodm.fields.CharField(required=True) is_depth_available = pymodm.fields.BooleanField(required=True) is_normals_available = pymodm.fields.BooleanField(required=True) is_stereo_available = pymodm.fields.BooleanField(required=True) is_labels_available = pymodm.fields.BooleanField(required=True) is_masks_available = pymodm.fields.BooleanField(required=True) is_stored_in_database = True camera_intrinsics = pymodm.fields.EmbeddedDocumentField(cam_intr.CameraIntrinsics, required=True) stereo_offset = TransformField() right_camera_intrinsics = pymodm.fields.EmbeddedDocumentField(cam_intr.CameraIntrinsics) # Extra properties for identifying the sequence and the trajectory dataset = pymodm.fields.CharField() # The name of the dataset. Should be unique when combined with sequence sequence_name = pymodm.fields.CharField() # The name of the sequence within the dataset. trajectory_id = pymodm.fields.CharField() # A unique name for the trajectory, so we can associate results by traj # List of available columns, and a getter for retrieving the value of each columns = ColumnList( dataset=attrgetter('dataset'), sequence_name=attrgetter('sequence_name'), trajectory_id=attrgetter('trajectory_id') ) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # If the timestamps aren't sorted, re-sort them if not all(self.timestamps[idx] >= self.timestamps[idx - 1] for idx in range(1, len(self.timestamps))): pairs = sorted(zip(self.timestamps, self.images), key=itemgetter(0)) self.images = [pair[1] for pair in pairs] self.timestamps = [pair[0] for pair in pairs] # Infer missing properties from the images if len(self.images) > 0: if self.image_group is None or len(self.image_group) <= 0: self.image_group = self.images[0].image_group if self.is_depth_available is None: self.is_depth_available = all(image.depth is not None for image in self.images) if self.is_normals_available is None: self.is_normals_available = all(image.normals is not None for image in self.images) if self.is_stereo_available is None: self.is_stereo_available = all(isinstance(image, StereoImage) for image in self.images) if self.is_labels_available is None: self.is_labels_available = any(len(image.metadata.labelled_objects) > 0 for image in self.images) if self.is_masks_available is None: self.is_masks_available = all( any(isinstance(label, MaskedObject) for label in image.metadata.labelled_objects) for image in self.images) if self.camera_intrinsics is None: self.camera_intrinsics = self.images[0].metadata.intrinsics if isinstance(self.images[0], StereoImage): if self.stereo_offset is None: self.stereo_offset = self.images[0].stereo_offset if self.right_camera_intrinsics is None: self.right_camera_intrinsics = self.images[0].right_metadata.intrinsics # Default value for trajectory id from the dataset and sequence name if self.trajectory_id is None and self.dataset is not None and self.sequence_name is not None: self.trajectory_id = self.dataset + ":" + self.sequence_name def __len__(self): """ The length of the image collection :return: """ return len(self.images) def __iter__(self): """ Iterator for the image collection. Returns a timestamp and image for each iteration :return: """ for timestamp, image in sorted(zip(self.timestamps, self.images), key=itemgetter(0)): yield timestamp, image def __getitem__(self, item): """ Allow index-based access. Why not. This is the same as get :param item: :return: """ return self.timestamps[item], self.images[item] @property def average_timestep(self) -> float: """ Get the average time interval between frames. :return: The total time divided by 1 less than the number of frames (the number of intervals) """ return (max(self.timestamps) - min(self.timestamps)) / (len(self.timestamps) - 1) def get_image_group(self) -> str: """ If the image source is stored in the database, get the image group it is stored under. This lets us pre-load images from that group :return: The image_group property """ return self.image_group def get_columns(self) -> typing.Set[str]: """ Get the set of available properties for this system. Pass these to "get_properties", below. :return: """ return set(self.columns.keys()) def get_properties(self, columns: typing.Iterable[str] = None) -> typing.Mapping[str, typing.Any]: """ Get the values of the requested properties :param columns: :return: """ if columns is None: columns = self.columns.keys() return { col_name: self.columns.get_value(self, col_name) for col_name in columns if col_name in self.columns } @classmethod def load_minimal(cls, object_id: bson.ObjectId) -> 'ImageCollection': """ Load an image collection without actual references to the images or the timestamps. This allows us to check if it is appropriate for certain systems, without using as much memory :param object_id: The id of the object to load :return: """ return cls.objects.only( 'sequence_type', 'image_group', 'is_depth_available', 'is_normals_available', 'is_stereo_available', 'is_labels_available', 'is_masks_available', 'camera_intrinsics' ).get({'_id': object_id})
class LibVisOMonoSystem(LibVisOSystem): """ Class to run LibVisO2 as a vision system in monocular mode. """ height = fields.FloatField(default=1.0) pitch = fields.FloatField(default=0.0) ransac_iters = fields.IntegerField(default=2000) inlier_threshold = fields.FloatField(default=0.00001) motion_threshold = fields.FloatField(default=100.0) # List of available metadata columns, and getters for each columns = ColumnList(LibVisOSystem.columns, height=attrgetter('height'), pitch=attrgetter('pitch'), ransac_iters=attrgetter('ransac_iters'), inlier_threshold=attrgetter('inlier_threshold'), motion_threshold=attrgetter('motion_threshold')) @property def has_scale(self): return False def make_viso_instance(self): """ Make a monocular libviso instance :return: """ params = Mono_parameters() logging.getLogger(__name__).debug( " Created parameters object, populating ...") # Matcher parameters params.match.nms_n = self.matcher_nms_n params.match.nms_tau = self.matcher_nms_tau params.match.match_binsize = self.matcher_match_binsize params.match.match_radius = self.matcher_match_radius params.match.match_disp_tolerance = self.matcher_match_disp_tolerance params.match.outlier_disp_tolerance = self.matcher_outlier_disp_tolerance params.match.outlier_flow_tolerance = self.matcher_outlier_flow_tolerance params.match.multi_stage = 1 if self.matcher_multi_stage else 0 params.match.half_resolution = 1 if self.matcher_half_resolution else 0 params.match.refinement = self.matcher_refinement.value logging.getLogger(__name__).debug(" Added matcher parameters ...") # Feature bucketing params.bucket.max_features = self.bucketing_max_features params.bucket.bucket_width = self.bucketing_bucket_width params.bucket.bucket_height = self.bucketing_bucket_height logging.getLogger(__name__).debug(" Added bucket parameters ...") # Monocular-specific parameters params.height = self.height params.pitch = self.pitch params.ransac_iters = self.ransac_iters params.inlier_threshold = self.inlier_threshold params.motion_threshold = self.motion_threshold logging.getLogger(__name__).debug( "Added monocular specific parameters ...") # Camera calibration params.calib.f = self._focal_distance params.calib.cu = self._cu params.calib.cv = self._cv logging.getLogger(__name__).debug( " Parameters built, creating viso object ...") return VisualOdometryMono(params) def handle_process_image(self, viso, image: Image, timestamp: float) -> \ typing.Tuple[bool, typing.Union[tf.Transform, None]]: """ Send a frame to LibViso2, and get back the estimated motion :param viso: The visual odometry object. Will be a stereo object. :param image: The image object. Will be a stereo image :param timestamp: The timestamp :return: True and a transform if the estimate is successful, False and None otherwise """ image_greyscale = prepare_image(image.pixels) logging.getLogger(__name__).debug(" prepared images ...") success = self._viso.process_frame(image_greyscale) logging.getLogger(__name__).debug(" processed frame ...") if success: motion = self._viso.getMotion() # Motion is a 4x4 pose matrix np_motion = np.zeros((4, 4)) motion.toNumpy(np_motion) np_motion = np.linalg.inv( np_motion ) # Invert the motion to make it new frame relative to old # This is the pose of the previous pose relative to the next one return True, make_relative_pose(np_motion) return False, None @classmethod def get_instance( cls, matcher_nms_n: int = 3, matcher_nms_tau: int = 50, matcher_match_binsize: int = 50, matcher_match_radius: int = 200, matcher_match_disp_tolerance: int = 2, matcher_outlier_disp_tolerance: int = 5, matcher_outlier_flow_tolerance: int = 5, matcher_multi_stage: bool = True, matcher_half_resolution: bool = True, matcher_refinement: MatcherRefinement = MatcherRefinement.PIXEL, bucketing_max_features: int = 2, bucketing_bucket_width: int = 50, bucketing_bucket_height: int = 50, height: float = 1.0, pitch: float = 0.0, ransac_iters: int = 2000, inlier_threshold: float = 0.00001, motion_threshold: float = 100.0) -> 'LibVisOMonoSystem': """ Get an instance of this vision system, with some parameters, pulling from the database if possible, or construct a new one if needed. It is the responsibility of subclasses to ensure that as few instances of each system as possible exist within the database. Does not save the returned object, you'll usually want to do that straight away. :return: """ # Look for existing objects with the same settings all_objects = LibVisOMonoSystem.objects.raw({ 'matcher_nms_n': int(matcher_nms_n), 'matcher_nms_tau': int(matcher_nms_tau), 'matcher_match_binsize': int(matcher_match_binsize), 'matcher_match_radius': int(matcher_match_radius), 'matcher_match_disp_tolerance': int(matcher_match_disp_tolerance), 'matcher_outlier_disp_tolerance': int(matcher_outlier_disp_tolerance), 'matcher_outlier_flow_tolerance': int(matcher_outlier_flow_tolerance), 'matcher_multi_stage': bool(matcher_multi_stage), 'matcher_half_resolution': bool(matcher_half_resolution), 'matcher_refinement': matcher_refinement.name, 'bucketing_max_features': int(bucketing_max_features), 'bucketing_bucket_width': int(bucketing_bucket_width), 'bucketing_bucket_height': int(bucketing_bucket_height), 'height': float(height), 'pitch': float(pitch), 'ransac_iters': int(ransac_iters), 'inlier_threshold': float(inlier_threshold), 'motion_threshold': float(motion_threshold) }) if all_objects.count() > 0: return all_objects.first() # There isn't an existing system with those settings, make a new one. obj = cls( matcher_nms_n=int(matcher_nms_n), matcher_nms_tau=int(matcher_nms_tau), matcher_match_binsize=int(matcher_match_binsize), matcher_match_radius=int(matcher_match_radius), matcher_match_disp_tolerance=int(matcher_match_disp_tolerance), matcher_outlier_disp_tolerance=int(matcher_outlier_disp_tolerance), matcher_outlier_flow_tolerance=int(matcher_outlier_flow_tolerance), matcher_multi_stage=bool(matcher_multi_stage), matcher_half_resolution=bool(matcher_half_resolution), matcher_refinement=matcher_refinement, bucketing_max_features=int(bucketing_max_features), bucketing_bucket_width=int(bucketing_bucket_width), bucketing_bucket_height=int(bucketing_bucket_height), height=float(height), pitch=float(pitch), ransac_iters=int(ransac_iters), inlier_threshold=float(inlier_threshold), motion_threshold=float(motion_threshold)) return obj
class LibVisOSystem(VisionSystem, metaclass=ABCModelMeta): """ Class to run LibVisO2 as a vision system. A generic base class, the specific types are below (LibVisOStereoSystem, LibVisOMonoSystem) """ matcher_nms_n = fields.IntegerField(default=3) matcher_nms_tau = fields.IntegerField(default=50) matcher_match_binsize = fields.IntegerField(default=50) matcher_match_radius = fields.IntegerField(default=200) matcher_match_disp_tolerance = fields.IntegerField(default=2) matcher_outlier_disp_tolerance = fields.IntegerField(default=5) matcher_outlier_flow_tolerance = fields.IntegerField(default=5) matcher_multi_stage = fields.BooleanField(default=True) matcher_half_resolution = fields.BooleanField(default=True) matcher_refinement = EnumField(MatcherRefinement, default=MatcherRefinement.PIXEL) bucketing_max_features = fields.IntegerField(default=2) bucketing_bucket_width = fields.IntegerField(default=50) bucketing_bucket_height = fields.IntegerField(default=50) # List of available metadata columns, and getters for each columns = ColumnList( seed=None, in_height=None, in_width=None, in_fx=None, in_fy=None, in_cx=None, in_cy=None, matcher_nms_n=attrgetter('matcher_nms_n'), matcher_nms_tau=attrgetter('matcher_nms_tau'), matcher_match_binsize=attrgetter('matcher_match_binsize'), matcher_match_radius=attrgetter('matcher_match_radius'), matcher_match_disp_tolerance=attrgetter( 'matcher_match_disp_tolerance'), matcher_outlier_disp_tolerance=attrgetter( 'matcher_outlier_disp_tolerance'), matcher_outlier_flow_tolerance=attrgetter( 'matcher_outlier_flow_tolerance'), matcher_multi_stage=attrgetter('matcher_multi_stage'), matcher_half_resolution=attrgetter('matcher_half_resolution'), matcher_refinement=attrgetter('matcher_refinement'), bucketing_max_features=attrgetter('bucketing_max_features'), bucketing_bucket_width=attrgetter('bucketing_bucket_width'), bucketing_bucket_height=attrgetter('bucketing_bucket_height')) def __init__(self, *args, **kwargs): """ """ super(LibVisOSystem, self).__init__(*args, **kwargs) # These will get overridden by set_camera_intrinisics self._focal_distance = 1.0 self._cu = 320 self._cv = 240 self._width = 0 # These are not actually used, only stored self._height = 0 # Ongoing state during a trial that is initialised in start_trial self._viso = None self._seed = None self._start_time = None self._has_chosen_origin = False self._frame_results = [] @classmethod def is_deterministic(cls) -> StochasticBehaviour: """ LibVisO2 is controlled with a seed :return: StochasticBehaviour.SEEDED """ return StochasticBehaviour.SEEDED def is_image_source_appropriate(self, image_source: ImageSource) -> bool: return image_source.sequence_type == ImageSequenceType.SEQUENTIAL def set_camera_intrinsics(self, camera_intrinsics: CameraIntrinsics, average_timestep: float) -> None: """ Set the camera intrinisics for libviso2 :param camera_intrinsics: The camera intrinsics, relative to the image resolution :param average_timestep: The average time between frames. Not relevant to libviso. :return: """ logging.getLogger(__name__).debug("Setting camera intrinsics") self._focal_distance = float(camera_intrinsics.fx) self._cu = float(camera_intrinsics.cx) self._cv = float(camera_intrinsics.cy) self._width = float(camera_intrinsics.width) self._height = float(camera_intrinsics.height) def start_trial(self, sequence_type: ImageSequenceType, seed: int = 0) -> None: logging.getLogger(__name__).debug("Starting LibVisO trial...") self._start_time = time.time() if not sequence_type == ImageSequenceType.SEQUENTIAL: return self._viso = self.make_viso_instance() self._seed = seed self._viso.seed(seed) self._has_chosen_origin = False self._frame_results = [] logging.getLogger(__name__).debug(" Started LibVisO trial.") def process_image(self, image: Image, timestamp: float) -> None: start_time = time.time() logging.getLogger(__name__).debug( "Processing image at time {0} ...".format(timestamp)) # This is the pose of the previous pose relative to the next one tracking, estimated_motion = self.handle_process_image( self._viso, image, timestamp) logging.getLogger(__name__).debug(" got estimated motion ...") end_time = time.time() frame_result = FrameResult( timestamp=timestamp, image=image.pk, processing_time=end_time - start_time, pose=image.camera_pose, tracking_state=TrackingState.OK if tracking else TrackingState.LOST if self._has_chosen_origin else TrackingState.NOT_INITIALIZED, estimated_motion=estimated_motion, num_matches=self._viso.getNumberOfMatches()) if tracking and not self._has_chosen_origin: # set the intial pose estimate to 0, so we can infer the later ones from the motions self._has_chosen_origin = True frame_result.estimated_pose = tf.Transform() frame_result.estimated_motion = None # This will always be the identity on the first valid frame self._frame_results.append(frame_result) logging.getLogger(__name__).debug(" Processing done.") def finish_trial(self) -> SLAMTrialResult: logging.getLogger(__name__).debug("Finishing LibVisO trial ...") result = SLAMTrialResult(system=self, success=True, settings=self.get_settings(), results=self._frame_results, has_scale=self.has_scale) self._frame_results = None self._viso = None result.run_time = time.time() - self._start_time self._start_time = None logging.getLogger(__name__).debug(" Created result") return result def get_columns(self) -> typing.Set[str]: """ Get the set of available properties for this system. Pass these to "get_properties", below. :return: """ return set(self.columns.keys()) def get_properties( self, columns: typing.Iterable[str] = None, settings: typing.Mapping[str, typing.Any] = None ) -> typing.Mapping[str, typing.Any]: """ Get the values of the requested properties :param columns: :param settings: :return: """ if columns is None: columns = self.columns.keys() if settings is None: settings = {} return { col_name: settings[col_name] if col_name in settings else self.columns.get_value(self, col_name) for col_name in columns if col_name in self.columns } @abc.abstractmethod def make_viso_instance(self): """ Make the viso object. Stereo mode will make a stereo object, monocular a monocular object :return: """ pass @abc.abstractmethod def handle_process_image(self, viso, image: Image, timestamp: float) -> \ typing.Tuple[bool, typing.Union[tf.Transform, None]]: """ Send the image to the viso object. In stereo mode, we need to send left and right frames, in monocular only one frame. :param viso: The viso object, created by 'make_viso_instance' :param image: The image object :param timestamp: The timestamp for this frame :return: True and a transform if the estimate is successful, False and None otherwise """ pass @property @abc.abstractmethod def has_scale(self): pass def get_settings(self): return { 'seed': self._seed, 'in_fx': self._focal_distance, 'in_fy': self._focal_distance, 'in_cu': self._cu, 'in_cv': self._cv, 'in_height': self._height, 'in_width': self._width }
class LibVisOStereoSystem(LibVisOSystem): """ """ ransac_iters = fields.IntegerField(default=200) inlier_threshold = fields.FloatField(default=2.0) reweighting = fields.BooleanField(default=True) # List of available metadata columns, and getters for each columns = ColumnList(LibVisOSystem.columns, base=None, ransac_iters=attrgetter('ransac_iters'), inlier_threshold=attrgetter('inlier_threshold'), reweighting=attrgetter('reweighting')) def __init__(self, *args, **kwargs): super(LibVisOStereoSystem, self).__init__(*args, **kwargs) # These will get overridden by set_stereo_offset self._base = 0.3 @property def has_scale(self): return True def is_image_source_appropriate(self, image_source: ImageSource) -> bool: return (super(LibVisOStereoSystem, self).is_image_source_appropriate(image_source) and image_source.is_stereo_available) def set_stereo_offset(self, offset: tf.Transform) -> None: """ Set the stereo baseline :param offset: :return: """ baseline = -1 * offset.location[1] # right is -Y axis logging.getLogger(__name__).debug( "Setting stereo baseline to {0}".format(baseline)) self._base = float(baseline) def make_viso_instance(self): """ Construct a stereo libviso system :return: """ params = Stereo_parameters() logging.getLogger(__name__).debug( " Created parameters object, populating ...") # Matcher parameters params.match.nms_n = self.matcher_nms_n params.match.nms_tau = self.matcher_nms_tau params.match.match_binsize = self.matcher_match_binsize params.match.match_radius = self.matcher_match_radius params.match.match_disp_tolerance = self.matcher_match_disp_tolerance params.match.outlier_disp_tolerance = self.matcher_outlier_disp_tolerance params.match.outlier_flow_tolerance = self.matcher_outlier_flow_tolerance params.match.multi_stage = 1 if self.matcher_multi_stage else 0 params.match.half_resolution = 1 if self.matcher_half_resolution else 0 params.match.refinement = self.matcher_refinement.value logging.getLogger(__name__).debug(" Added matcher parameters ...") # Feature bucketing params.bucket.max_features = self.bucketing_max_features params.bucket.bucket_width = self.bucketing_bucket_width params.bucket.bucket_height = self.bucketing_bucket_height logging.getLogger(__name__).debug(" Added bucket parameters ...") # Stereo-specific parameters params.ransac_iters = self.ransac_iters params.inlier_threshold = self.inlier_threshold params.reweighting = self.reweighting logging.getLogger(__name__).debug( "Added stereo specific parameters ...") # Camera calibration params.calib.f = self._focal_distance params.calib.cu = self._cu params.calib.cv = self._cv params.base = self._base logging.getLogger(__name__).debug( " Parameters built, creating viso object ...") return VisualOdometryStereo(params) def handle_process_image(self, viso, image: Image, timestamp: float) -> \ typing.Tuple[bool, typing.Union[tf.Transform, None]]: """ Send a frame to LibViso2, and get back the estimated motion :param viso: The visual odometry object. Will be a stereo object. :param image: The image object. Will be a stereo image :param timestamp: The timestamp :return: True and a transform if the estimate is successful, False and None otherwise """ left_grey = prepare_image(image.left_pixels) right_grey = prepare_image(image.right_pixels) logging.getLogger(__name__).debug(" prepared images ...") success = viso.process_frame(left_grey, right_grey) logging.getLogger(__name__).debug(" processed frame ...") if success: motion = viso.getMotion() # Motion is a 4x4 pose matrix np_motion = np.zeros((4, 4)) motion.toNumpy(np_motion) np_motion = np.linalg.inv( np_motion ) # Invert the motion to make it new frame relative to old # This is the pose of the previous pose relative to the next one return True, make_relative_pose(np_motion) return False, None def get_settings(self): settings = super(LibVisOStereoSystem, self).get_settings() settings['base'] = self._base return settings def preload_image_data(self, image: Image) -> None: """ Preload the pixel data we use from the images. This is a stereo system, load right pixel data as well :param image: :return: """ super(LibVisOStereoSystem, self).preload_image_data(image) if hasattr(image, 'right_pixels'): _ = image.right_pixels @classmethod def get_instance( cls, matcher_nms_n: int = 3, matcher_nms_tau: int = 50, matcher_match_binsize: int = 50, matcher_match_radius: int = 200, matcher_match_disp_tolerance: int = 2, matcher_outlier_disp_tolerance: int = 5, matcher_outlier_flow_tolerance: int = 5, matcher_multi_stage: bool = True, matcher_half_resolution: bool = True, matcher_refinement: MatcherRefinement = MatcherRefinement.PIXEL, bucketing_max_features: int = 2, bucketing_bucket_width: int = 50, bucketing_bucket_height: int = 50, ransac_iters: int = 200, inlier_threshold: float = 2.0, reweighting: bool = True) -> 'LibVisOStereoSystem': """ Get an instance of this vision system, with some parameters, pulling from the database if possible, or construct a new one if needed. It is the responsibility of subclasses to ensure that as few instances of each system as possible exist within the database. Does not save the returned object, you'll usually want to do that straight away. :return: """ # Look for existing objects with the same settings all_objects = LibVisOStereoSystem.objects.raw({ 'matcher_nms_n': int(matcher_nms_n), 'matcher_nms_tau': int(matcher_nms_tau), 'matcher_match_binsize': int(matcher_match_binsize), 'matcher_match_radius': int(matcher_match_radius), 'matcher_match_disp_tolerance': int(matcher_match_disp_tolerance), 'matcher_outlier_disp_tolerance': int(matcher_outlier_disp_tolerance), 'matcher_outlier_flow_tolerance': int(matcher_outlier_flow_tolerance), 'matcher_multi_stage': bool(matcher_multi_stage), 'matcher_half_resolution': bool(matcher_half_resolution), 'matcher_refinement': matcher_refinement.name, 'bucketing_max_features': int(bucketing_max_features), 'bucketing_bucket_width': int(bucketing_bucket_width), 'bucketing_bucket_height': int(bucketing_bucket_height), 'ransac_iters': int(ransac_iters), 'inlier_threshold': float(inlier_threshold), 'reweighting': bool(reweighting) }) if all_objects.count() > 0: return all_objects.first() # There isn't an existing system with those settings, make a new one. obj = cls( matcher_nms_n=int(matcher_nms_n), matcher_nms_tau=int(matcher_nms_tau), matcher_match_binsize=int(matcher_match_binsize), matcher_match_radius=int(matcher_match_radius), matcher_match_disp_tolerance=int(matcher_match_disp_tolerance), matcher_outlier_disp_tolerance=int(matcher_outlier_disp_tolerance), matcher_outlier_flow_tolerance=int(matcher_outlier_flow_tolerance), matcher_multi_stage=bool(matcher_multi_stage), matcher_half_resolution=bool(matcher_half_resolution), matcher_refinement=matcher_refinement, bucketing_max_features=int(bucketing_max_features), bucketing_bucket_width=int(bucketing_bucket_width), bucketing_bucket_height=int(bucketing_bucket_height), ransac_iters=int(ransac_iters), inlier_threshold=float(inlier_threshold), reweighting=bool(reweighting)) return obj
class OrbSlam2(VisionSystem): """ Python wrapper for ORB_SLAM2 """ vocabulary_file = fields.CharField(blank=True, default='') mode = EnumField(SensorMode, required=True) vocabulary_branching_factor = fields.IntegerField(required=True, default=10) vocabulary_depth = fields.IntegerField(required=True, default=6) vocabulary_seed = fields.IntegerField(required=True, default=0) depth_threshold = fields.FloatField(required=True, default=40.0) orb_num_features = fields.IntegerField(required=True, default=2000) orb_scale_factor = fields.FloatField(required=True, default=1.2) orb_num_levels = fields.IntegerField(required=True, default=8) orb_ini_threshold_fast = fields.IntegerField(required=True, default=12) orb_min_threshold_fast = fields.IntegerField(required=True, default=7) # List of available metadata columns, and getters for each columns = ColumnList( vocabulary_file=attrgetter('vocabulary_file'), mode=attrgetter('mode'), in_height=None, in_width=None, in_fx=None, in_fy=None, in_cx=None, in_cy=None, in_p1=None, in_p2=None, in_k1=None, in_k2=None, in_k3=None, base=None, vocabulary_branching_factor=attrgetter('vocabulary_branching_factor'), vocabulary_depth=attrgetter('vocabulary_depth'), vocabulary_seed=attrgetter('vocabulary_seed'), depth_threshold=attrgetter('depth_threshold'), orb_num_features=attrgetter('orb_num_features'), orb_scale_factor=attrgetter('orb_scale_factor'), orb_num_levels=attrgetter('orb_num_levels'), orb_ini_threshold_fast=attrgetter('orb_ini_threshold_fast'), orb_min_threshold_fast=attrgetter('orb_min_threshold_fast') ) def __init__(self, *args, **kwargs): super(OrbSlam2, self).__init__(*args, **kwargs) self._intrinsics = None self._framerate = 30 self._stereo_baseline = None self._expected_completion_timeout = 3600 # This is how long we wait after the dataset is finished self._temp_folder = None self._actual_vocab_file = None self._settings_file = None self._child_process = None self._input_queue = None self._output_queue = None self._start_time = None self._partial_frame_results = None @classmethod def is_deterministic(cls) -> StochasticBehaviour: """ ORB_SLAM2 is non-deterministic, it will always give different results. :return: StochasticBehaviour.NON_DETERMINISTIC """ return StochasticBehaviour.NON_DETERMINISTIC def is_image_source_appropriate(self, image_source: ImageSource) -> bool: """ Is the dataset appropriate for testing this vision system. This will depend on which sensor mode ORB_SLAM is configured in, stereo mode will require stereo to be available, while RGB-D mode will require depth to be available. Also checks the ORB feature pyramid settings against the :param image_source: The source for images that this system will potentially be run with. :return: True iff the particular dataset is appropriate for this vision system. :rtype: bool """ return (image_source.sequence_type == ImageSequenceType.SEQUENTIAL and ( self.mode == SensorMode.MONOCULAR or (self.mode == SensorMode.STEREO and image_source.is_stereo_available) or (self.mode == SensorMode.RGBD and image_source.is_depth_available) ) and check_feature_pyramid_settings( img_width=image_source.camera_intrinsics.width, img_height=image_source.camera_intrinsics.height, orb_scale_factor=self.orb_scale_factor, orb_num_levels=self.orb_num_levels )) def get_columns(self) -> typing.Set[str]: """ Get the set of available properties for this system. Pass these to "get_properties", below. :return: """ return set(self.columns.keys()) def get_properties(self, columns: typing.Iterable[str] = None, settings: typing.Mapping[str, typing.Any] = None) -> typing.Mapping[str, typing.Any]: """ Get the values of the requested properties :param columns: :param settings: :return: """ if columns is None: columns = self.columns.keys() if settings is None: settings = {} properties = { col_name: settings[col_name] if col_name in settings else self.columns.get_value(self, col_name) for col_name in columns if col_name in self.columns } if 'mode' in properties and not isinstance(properties['mode'], SensorMode): properties['mode'] = SensorMode[properties['mode']] return properties def set_camera_intrinsics(self, camera_intrinsics: CameraIntrinsics, average_timestep: float) -> None: """ Set the intrinsics of the camera using :param camera_intrinsics: A metadata.camera_intrinsics.CameraIntriniscs object :param average_timestep: The average time interval between frames. Used to configure ORB_SLAM2 :return: """ if self._child_process is None: self._intrinsics = camera_intrinsics self._framerate = 1 / average_timestep def set_stereo_offset(self, offset: tf.Transform) -> None: """ Set the stereo baseline configuration. :param offset: :return: """ # ORBSLAM expects cameras to be coplanar, only offset to the right (-Y) self._stereo_baseline = -1 * offset.location[1] def resolve_paths(self, path_manager: PathManager): """ Use the path manager to find the required files on disk. Will raise various exceptions if the file is not available. You MUST call this before calling start_trial (run_task will handle that). :param path_manager: The PathManager, for locating files. :return: """ if self.vocabulary_file is None or len(self.vocabulary_file) <= 0: raise ValueError("No vocabulary available for ORB-SLAM {0}, did you build one?".format(self.pk)) self._temp_folder = path_manager.get_temp_folder() self._actual_vocab_file = path_manager.find_file(self.vocabulary_file) def preload_image_data(self, image: Image) -> None: """ Preload the pixel data we use from the images. If the system is configured for stereo or RGBD operation, load those pixels as well. :param image: :return: """ super(OrbSlam2, self).preload_image_data(image) if self.mode is SensorMode.STEREO and hasattr(image, 'right_pixels'): # Load the right image if the system is configured for stereo _ = image.right_pixels elif self.mode is SensorMode.RGBD: # Load the depth image if the system is configured for RGB-D _ = image.depth def start_trial(self, sequence_type: ImageSequenceType, seed: int = 0) -> None: """ Start a trial with this system. After calling this, we can feed images to the system. When the trial is complete, call finish_trial to get the result. :param sequence_type: Are the provided images part of a sequence, or just unassociated pictures. :param seed: A random seed. Ignored. :return: void """ if sequence_type is not ImageSequenceType.SEQUENTIAL: return logging.getLogger(__name__).debug( "Starting ORBSLAM with the following settings:\n" " vocab path: '{0}'\n" " temp folder: '{1}'\n" " stereo baseline: {2}\n" " intrinsics: {3}\n" " framerate: {4}".format( self._actual_vocab_file, self._temp_folder, self._stereo_baseline, self._intrinsics, self._framerate )) self._start_time = time.time() self.save_settings() # we have to save the settings, so that orb-slam can load them self._partial_frame_results = {} self._input_queue = multiprocessing.Queue() self._output_queue = multiprocessing.Queue() self._child_process = multiprocessing.Process(target=run_orbslam, args=(self._output_queue, self._input_queue, str(self._actual_vocab_file), str(self._settings_file), self.mode)) self._child_process.daemon = True self._child_process.start() logging.getLogger(__name__).info("Started subprocess, waiting for ready signal...") try: started = self._output_queue.get(block=True, timeout=self._expected_completion_timeout) except queue.Empty: started = None if started is None: # Failed to start, clean up and then raise exception self._stop_subprocess(terminate=True) self.remove_settings() self._input_queue = None self._output_queue = None self._partial_frame_results = None raise RuntimeError("Failed to start ORBSLAM2, timed out after {0} seconds".format( self._expected_completion_timeout)) def process_image(self, image: Image, timestamp: float) -> None: """ Process an image as part of the current run. Should automatically start a new trial if none is currently started. :param image: The image object for this frame :param timestamp: A timestamp or index associated with this image. Sometimes None. :return: void """ if self._input_queue is not None: # Wait here, to throttle the input rate to the queue, and prevent it from growing too large # delay_time = 0 # while self._input_queue.qsize() > 30 and delay_time < 10: # time.sleep(1) # delay_time += 1 logging.getLogger(__name__).debug("Sending frame {0}...".format(len(self._partial_frame_results))) # Add the camera pose to the ground-truth trajectory self._partial_frame_results[timestamp] = FrameResult( timestamp=timestamp, image=image.pk, pose=image.camera_pose ) # Send different input based on the running mode if self.mode == SensorMode.MONOCULAR: self._input_queue.put((image_utils.convert_to_grey(image.pixels), None, timestamp)) elif self.mode == SensorMode.STEREO: self._input_queue.put((image_utils.convert_to_grey(image.left_pixels), image_utils.convert_to_grey(image.right_pixels), timestamp)) elif self.mode == SensorMode.RGBD: self._input_queue.put((image_utils.convert_to_grey(image.pixels), image.depth.astype(np.float32), timestamp)) def finish_trial(self) -> SLAMTrialResult: """ End the current trial, returning a trial result. Return none if no trial is started. :return: :rtype TrialResult: """ if self._input_queue is None: raise RuntimeError("Cannot finish ORBSLAM trial, failed to start.") # This will end the main loop, see run_orbslam, below self._input_queue.put(None) # Get the results from the subprocess timeout = (len(self._partial_frame_results) + 1) * self._expected_completion_timeout / 10 frame_statistics = None try: while frame_statistics is None or frame_statistics == _STARTED_MARKER: # Keep getting from the queue until we get something other than the 'started' mark frame_statistics = self._output_queue.get( block=True, timeout=timeout ) if frame_statistics is not None: logging.getLogger(__name__).debug(f"Got result: {frame_statistics}") except queue.Empty: frame_statistics = None # First, clean up (regardless of whether we got results) self._stop_subprocess(terminate=frame_statistics is None) self.remove_settings() self._input_queue = None self._output_queue = None # If we couldn't get the results, raise an exception if frame_statistics is None: raise RuntimeError("Failed to stop ORBSLAM2, timed out after {0} seconds".format(timeout)) # Merge the frame statistics with the partial frame results timestamps = set(self._partial_frame_results.keys()) if not all(timestamp in timestamps for timestamp in frame_statistics.keys()): if len(timestamps) <= 0: # No known timestamps, log them all logging.getLogger(__name__).warning( "Subprocess returned estimates for times, but the parent process has recieved no images. " f"(times were {sorted(frame_statistics.keys())})") frame_statistics = {} else: timestamps_arr = np.array(list(timestamps)) rekey_map = { subprocess_stamp: find_closest(subprocess_stamp, timestamps_arr) for subprocess_stamp in frame_statistics.keys() } frame_statistics = { rekey_map[subprocess_stamp]: frame_statistics[subprocess_stamp] for subprocess_stamp in frame_statistics.keys() if rekey_map[subprocess_stamp] is not None } # Log the subprocess timestamps that do not appear in our known list unknown_keys = [ subprocess_stamp for subprocess_stamp in rekey_map.keys() if rekey_map[subprocess_stamp] is None ] if len(unknown_keys) > 0: logging.getLogger(__name__).warning( "The following timestamps were returned by the subprocess, " "but do not match any image timstamp known by the parent process: " + str(unknown_keys)) # Merge the data from the subprocess with the partial frame results for timestamp, frame_stats in frame_statistics.items(): frame_result = self._partial_frame_results[timestamp] if frame_stats[0] is not None: frame_result.processing_time = frame_stats[0] frame_result.num_features = frame_stats[1] frame_result.num_matches = frame_stats[2] frame_result.tracking_state = frame_stats[3] frame_result.loop_edges = list(frame_stats[5]) if frame_stats[4] is not None: estimated_pose = np.identity(4) estimated_pose[0:3, :] = frame_stats[4] frame_result.estimated_pose = make_relative_pose(estimated_pose) if not all(loop_timestamp in timestamps for loop_timestamp in frame_result.loop_edges): logging.getLogger(__name__).warning(f"Loop closures for {timestamp} didn't match a known timestamp") result = SLAMTrialResult( system=self.pk, success=len(self._partial_frame_results) > 0, results=[self._partial_frame_results[timestamp] for timestamp in sorted(timestamps)], has_scale=(self.mode != SensorMode.MONOCULAR), settings={ 'in_fx': self._intrinsics.fx, 'in_fy': self._intrinsics.fy, 'in_cx': self._intrinsics.cx, 'in_cy': self._intrinsics.cy, 'in_k1': self._intrinsics.k1, 'in_k2': self._intrinsics.k2, 'in_p1': self._intrinsics.p1, 'in_p2': self._intrinsics.p2, 'in_k3': self._intrinsics.k3, 'in_width': self._intrinsics.width, 'in_height': self._intrinsics.height, 'base': self._stereo_baseline if self._stereo_baseline is not None else float('nan'), 'vocabulary_file': str(self.vocabulary_file), 'mode': str(self.mode.name), 'depth_threshold': self.depth_threshold, 'orb_num_features': self.orb_num_features, 'orb_scale_factor': self.orb_scale_factor, 'orb_num_levels': self.orb_num_levels, 'orb_ini_threshold_fast': self.orb_ini_threshold_fast, 'orb_min_threshold_fast': self.orb_min_threshold_fast } ) result.run_time = time.time() - self._start_time self._partial_frame_results = None self._start_time = None return result @classmethod def get_instance( cls, mode: SensorMode = None, vocabulary_file: str = None, vocabulary_branching_factor: int = 10, vocabulary_depth: int = 6, vocabulary_seed: int = 0, depth_threshold: float = 40.0, orb_num_features: int = 2000, orb_scale_factor: float = 1.2, orb_num_levels: int = 8, orb_ini_threshold_fast: int = 12, orb_min_threshold_fast: int = 7 ) -> 'OrbSlam2': """ Get an instance of this vision system, with some parameters, pulling from the database if possible, or construct a new one if needed. It is the responsibility of subclasses to ensure that as few instances of each system as possible exist within the database. Does not save the returned object, you'll usually want to do that straight away. Also does not build the Vocabulary. Again, handle that and re-save before using. :return: An OrbSlam2 instance with the given settings. """ if mode is None: raise ValueError("Cannot search for ORBSLAM without a mode, please specify a sensor mode") # Look for existing objects with the same settings query = { 'mode': str(mode.name), 'depth_threshold': float(depth_threshold), 'orb_num_features': int(orb_num_features), 'orb_scale_factor': float(orb_scale_factor), 'orb_num_levels': int(orb_num_levels), 'orb_ini_threshold_fast': int(orb_ini_threshold_fast), 'orb_min_threshold_fast': int(orb_min_threshold_fast) } if vocabulary_file is not None and len(vocabulary_file) > 0: # Only request a specific vocabulary file if one is requested, otherwise leave the parameter free. query['vocabulary_file'] = str(vocabulary_file) else: # No vocabulary file specified, look for a system with the same settings query['vocabulary_branching_factor'] = int(vocabulary_branching_factor) query['vocabulary_depth'] = int(vocabulary_depth) query['vocabulary_seed'] = int(vocabulary_seed) all_objects = OrbSlam2.objects.raw(query) if all_objects.count() > 0: return all_objects.first() # There isn't an existing system with those settings, make a new one. obj = cls( mode=mode, vocabulary_file=vocabulary_file, vocabulary_branching_factor=int(vocabulary_branching_factor), vocabulary_depth=int(vocabulary_depth), vocabulary_seed=int(vocabulary_seed), depth_threshold=float(depth_threshold), orb_num_features=int(orb_num_features), orb_scale_factor=float(orb_scale_factor), orb_num_levels=int(orb_num_levels), orb_ini_threshold_fast=int(orb_ini_threshold_fast), orb_min_threshold_fast=int(orb_min_threshold_fast) ) return obj def save_settings(self): if self._settings_file is None: if self._temp_folder is None: raise RuntimeError("Cannot save settings, no configured temporary directory") if self._intrinsics is None: raise RuntimeError("Cannot save settings without the camera intrinsics") # Build the settings object orbslam_settings = { 'Camera': { # Camera calibration and distortion parameters (OpenCV) # Most of these get overridden with the camera intrinsics at the start of the run. 'fx': self._intrinsics.fx, 'fy': self._intrinsics.fy, 'cx': self._intrinsics.cx, 'cy': self._intrinsics.cy, 'k1': self._intrinsics.k1, 'k2': self._intrinsics.k2, 'p1': self._intrinsics.p1, 'p2': self._intrinsics.p2, 'k3': self._intrinsics.k3, 'width': self._intrinsics.width, 'height': self._intrinsics.height, # Camera frames per second 'fps': self._framerate, # Color order of the images (0: BGR, 1: RGB. It is ignored if images are grayscale) # All the images in this system will be greyscale anyway 'RGB': 1 }, # Close/Far threshold. Baseline times. I don't know what this does. 'ThDepth': self.depth_threshold, # Depthmap values factor (all my depth is in meters, rescaling is handled elsewhere) 'DepthMapFactor': 1.0, 'ORBextractor': { # ORB Extractor: Number of features per image 'nFeatures': self.orb_num_features, # ORB Extractor: Scale factor between levels in the scale pyramid 'scaleFactor': self.orb_scale_factor, # ORB Extractor: Number of levels in the scale pyramid 'nLevels': self.orb_num_levels, # ORB Extractor: Fast threshold # Image is divided in a grid. At each cell FAST are extracted imposing a minimum response. # Firstly we impose iniThFAST. If no corners are detected we impose a lower value minThFAST # You can lower these values if your images have low contrast 'iniThFAST': self.orb_ini_threshold_fast, 'minThFAST': self.orb_min_threshold_fast }, # Viewer configuration expected by ORB_SLAM2 # Since the viewer is disabled, these values don't matter, but need to exist 'Viewer': { 'KeyFrameSize': 0.05, 'KeyFrameLineWidth': 1, 'GraphLineWidth': 0.9, 'PointSize': 2, 'CameraSize': 0.08, 'CameraLineWidth': 3, 'ViewpointX': 0, 'ViewpointY': -0.7, 'ViewpointZ': -1.8, 'ViewpointF': 500 } } if self.mode is SensorMode.STEREO: if self._stereo_baseline is not None: # stereo baseline times fx orbslam_settings['Camera']['bf'] = float(self._stereo_baseline * self._intrinsics.fx) else: raise RuntimeError("Cannot save stereo settings without a stereo baseline") # Choose a new settings file, using mkstemp to avoid collisions _, self._settings_file = tempfile.mkstemp( prefix='orb-slam2-settings-{0}-'.format(self.pk if self.pk is not None else 'unregistered'), suffix='.yaml', dir=self._temp_folder ) self._settings_file = Path(self._settings_file) dump_config(self._settings_file, orbslam_settings) def remove_settings(self) -> None: """ Get rid of the settings file after creating it using save_settings :return: """ if self._settings_file is not None: if self._settings_file.exists(): self._settings_file.unlink() self._settings_file = None def build_vocabulary(self, image_sources: typing.Iterable[ImageSource], output_folder: Path, force: bool = False, change_threshold: float = 0.6, z_depth: float = 1.0) -> None: """ Construct a vocabulary file :param image_sources: The image sources to use to build the vocabulary. :param output_folder: A folder to output the vocabulary file to. Get this from the path manager. :param force: Build even if the file already exists :param change_threshold: The IoU between successive views that are considered distinct. Used to reduce the number of duplicate features given to the vocabulary. :param z_depth: The assumed z-depth when reprojecting image frames to work out overlap. :return: None """ output_filename = None if self.vocabulary_file is not None and len(self.vocabulary_file) > 0: # Use the existing filename within whatever folder as the filename output_filename = self.vocabulary_file.split('/')[-1] if output_filename is None or len(output_filename) <= 0: if self.pk is not None: output_filename = VOCABULARY_FILENAME_TEMPLATE.format(self.pk) else: raise ValueError("Could not choose a name for the vocabulary file. Please save the model and try again") output_path = output_folder / VOCABULARY_FOLDER / output_filename if force or not output_path.exists(): vocab_builder = VocabularyBuilder( self.orb_num_features, # Number of ORB features from the detector self.orb_scale_factor, # Scale factor for the ORB scale pyramid self.orb_num_levels, # Number of levels in the ORB scale pyramid 31, # Edge threshold, matches patch size 0, # First level 2, # WTA_K=2, that is, use 2 point to determine descriptor elements 1, # ScoreType = ORB::FAST_SCORE 31, # Patch size, matching the constant in OrbExtractor.cc min(self.orb_ini_threshold_fast, self.orb_min_threshold_fast) # The lower FAST threshold ) images_added = 0 logging.getLogger(__name__).debug("Building ORB vocab...") for image_source in image_sources: current_image = None for timestamp, image in image_source: # Make sure successive images are at least a little different if current_image is None or find_percentage_overlap(current_image, image, z_depth) < change_threshold: grey_image = image_utils.convert_to_grey(image.pixels) vocab_builder.add_image(grey_image) current_image = image images_added += 1 if images_added < 10: raise ValueError("Could not find enough images with threshold {0}".format(change_threshold)) logging.getLogger(__name__).debug( "Created ORB vocabulary from {0} images, saving to {1}...".format(images_added, output_path)) output_path.parent.mkdir(parents=True, exist_ok=True) # Construct the vocabulary file vocab_builder.build_vocabulary( str(output_path), branchingFactor=int(self.vocabulary_branching_factor), numLevels=int(self.vocabulary_depth), seed=int(self.vocabulary_seed) ) # Update the stored vocabulary file to point to the newly build file. self.vocabulary_file = VOCABULARY_FOLDER + '/' + output_filename def _stop_subprocess(self, terminate: bool = False, timeout: float = 5.0) -> None: """ Stop the subprocess, by any means necessary. :param terminate: Whether to open with SIGTERM before trying to join, do when you know it's crashed. :return: """ if self._child_process: if terminate: self._child_process.terminate() self._child_process.join(timeout=timeout) if not terminate and self._child_process.is_alive(): # we didn't terminate before, but we've been unable to join, send sig-term self._child_process.terminate() self._child_process.join(timeout=timeout) if self._child_process.is_alive(): # We've timed out after a terminate, kill it with fire self._child_process.kill() self._child_process = None