def fn(): for field in standard_fields.get_input_object_fields(): if field in inputs: inputs[field] = tf.boolean_mask(inputs[field], valid_mask) for field in standard_fields.get_output_object_fields(): if field in outputs: outputs[field] = tf.boolean_mask(outputs[field], valid_mask) return _box_corner_distance_loss( loss_type=loss_type, is_balanced=is_balanced, input_boxes_length=inputs[ standard_fields.InputDataFields.objects_length], input_boxes_height=inputs[ standard_fields.InputDataFields.objects_height], input_boxes_width=inputs[standard_fields.InputDataFields.objects_width], input_boxes_center=inputs[ standard_fields.InputDataFields.objects_center], input_boxes_rotation_matrix=inputs[ standard_fields.InputDataFields.objects_rotation_matrix], input_boxes_instance_id=inputs[ standard_fields.InputDataFields.objects_instance_id], output_boxes_length=outputs[ standard_fields.DetectionResultFields.objects_length], output_boxes_height=outputs[ standard_fields.DetectionResultFields.objects_height], output_boxes_width=outputs[ standard_fields.DetectionResultFields.objects_width], output_boxes_center=outputs[ standard_fields.DetectionResultFields.objects_center], output_boxes_rotation_matrix=outputs[ standard_fields.DetectionResultFields.objects_rotation_matrix], delta=delta)
def get_batch_size_1_input_objects(inputs, b): """Returns input dictionary containing tensors with batch size of 1. Note that this function only applies its example selection to the object tensors. Args: inputs: A dictionary of tf.Tensors with our input data. b: Example index in the batch. Returns: inputs_1: A dictionary of tf.Tensors with batch size of one. """ b_1_inputs = {} for field in standard_fields.get_input_object_fields(): if field in inputs: b_1_inputs[field] = inputs[field][b] return b_1_inputs
def split_inputs(inputs, input_field_mapping_fn, image_preprocess_fn_dic, images_points_correspondence_fn): """Splits inputs to view_image_inputs, view_indices_2d_inputs, mesh_inputs. Args: inputs: Input dictionary. input_field_mapping_fn: A function that maps the input fields to the fields expected by object detection pipeline. image_preprocess_fn_dic: A dictionary of image preprocessing functions. images_points_correspondence_fn: A function that returns image and points correspondences. Returns: view_image_inputs: A dictionary containing image inputs. view_indices_2d_inputs: A dictionary containing indices 2d inputs. mesh_inputs: A dictionary containing mesh inputs. object_inputs: A dictionary containing object inputs. non_tensor_inputs: Other inputs. """ # Initializing empty dictionary for mesh, image, indices_2d and non tensor # inputs. non_tensor_inputs = {} view_image_inputs = {} view_indices_2d_inputs = {} mesh_inputs = {} object_inputs = {} if image_preprocess_fn_dic is None: image_preprocess_fn_dic = {} # Acquire point / image correspondences. if images_points_correspondence_fn is not None: fn_outputs = images_points_correspondence_fn(inputs) if 'points_position' in fn_outputs: inputs[standard_fields.InputDataFields .point_positions] = fn_outputs['points_position'] if 'points_intensity' in fn_outputs: inputs[standard_fields.InputDataFields .point_intensities] = fn_outputs['points_intensity'] if 'points_elongation' in fn_outputs: inputs[standard_fields.InputDataFields .point_elongations] = fn_outputs['points_elongation'] if 'points_normal' in fn_outputs: inputs[standard_fields.InputDataFields .point_normals] = fn_outputs['points_normal'] if 'points_color' in fn_outputs: inputs[standard_fields.InputDataFields .point_colors] = fn_outputs['points_color'] if 'view_images' in fn_outputs: for key in sorted(fn_outputs['view_images']): if len(fn_outputs['view_images'][key].shape) != 4: raise ValueError(('%s image should have rank 4.' % key)) view_image_inputs = fn_outputs['view_images'] if 'view_indices_2d' in fn_outputs: for key in sorted(fn_outputs['view_indices_2d']): if len(fn_outputs['view_indices_2d'][key].shape) != 3: raise ValueError(('%s indices_2d should have rank 3.' % key)) view_indices_2d_inputs = fn_outputs['view_indices_2d'] if input_field_mapping_fn is not None: inputs = input_field_mapping_fn(inputs) # Setting mesh inputs mesh_keys = [] for key in standard_fields.get_input_point_fields(): if key in inputs: mesh_keys.append(key) object_keys = [] for key in standard_fields.get_input_object_fields(): if key in inputs: object_keys.append(key) for k, v in inputs.items(): if k in mesh_keys: mesh_inputs[k] = v elif k in object_keys: object_inputs[k] = v else: non_tensor_inputs[k] = v logging.info('view image inputs') logging.info(view_image_inputs) logging.info('view indices 2d inputs') logging.info(view_indices_2d_inputs) logging.info('mesh inputs') logging.info(mesh_inputs) logging.info('object inputs') logging.info(object_inputs) logging.info('non_tensor_inputs') logging.info(non_tensor_inputs) return (view_image_inputs, view_indices_2d_inputs, mesh_inputs, object_inputs, non_tensor_inputs)
def prepare_kitti_dataset(inputs, valid_object_classes=None): """Maps the fields from loaded input to standard fields. Args: inputs: A dictionary of input tensors. valid_object_classes: List of valid object classes. if None, it is ignored. Returns: A dictionary of input tensors with standard field names. """ prepared_inputs = {} prepared_inputs[standard_fields.InputDataFields.point_positions] = inputs[ standard_fields.InputDataFields.point_positions] prepared_inputs[standard_fields.InputDataFields.point_intensities] = inputs[ standard_fields.InputDataFields.point_intensities] prepared_inputs[standard_fields.InputDataFields .camera_intrinsics] = inputs['cameras/cam02/intrinsics/K'] prepared_inputs[standard_fields.InputDataFields. camera_rotation_matrix] = inputs['cameras/cam02/extrinsics/R'] prepared_inputs[standard_fields.InputDataFields .camera_translation] = inputs['cameras/cam02/extrinsics/t'] prepared_inputs[standard_fields.InputDataFields .camera_image] = inputs['cameras/cam02/image'] prepared_inputs[standard_fields.InputDataFields .camera_raw_image] = inputs['cameras/cam02/image'] prepared_inputs[standard_fields.InputDataFields .camera_original_image] = inputs['cameras/cam02/image'] if 'scene_name' in inputs and 'frame_name' in inputs: prepared_inputs[ standard_fields.InputDataFields.camera_image_name] = tf.strings.join( [inputs['scene_name'], inputs['frame_name']], separator='_') if 'objects/pose/R' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_rotation_matrix] = inputs['objects/pose/R'] if 'objects/pose/t' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_center] = inputs['objects/pose/t'] if 'objects/shape/dimension' in inputs: prepared_inputs[ standard_fields.InputDataFields.objects_length] = tf.reshape( inputs['objects/shape/dimension'][:, 0], [-1, 1]) prepared_inputs[standard_fields.InputDataFields.objects_width] = tf.reshape( inputs['objects/shape/dimension'][:, 1], [-1, 1]) prepared_inputs[ standard_fields.InputDataFields.objects_height] = tf.reshape( inputs['objects/shape/dimension'][:, 2], [-1, 1]) if 'objects/category/label' in inputs: prepared_inputs[standard_fields.InputDataFields.objects_class] = tf.reshape( inputs['objects/category/label'], [-1, 1]) if valid_object_classes is not None: valid_objects_mask = tf.cast( tf.zeros_like( prepared_inputs[standard_fields.InputDataFields.objects_class], dtype=tf.int32), dtype=tf.bool) for object_class in valid_object_classes: valid_objects_mask = tf.logical_or( valid_objects_mask, tf.equal( prepared_inputs[standard_fields.InputDataFields.objects_class], object_class)) valid_objects_mask = tf.reshape(valid_objects_mask, [-1]) for key in standard_fields.get_input_object_fields(): if key in prepared_inputs: prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key], valid_objects_mask) return prepared_inputs
def prepare_waymo_open_dataset(inputs, valid_object_classes=None, max_object_distance_from_source=74.88): """Maps the fields from loaded input to standard fields. Args: inputs: A dictionary of input tensors. valid_object_classes: List of valid object classes. if None, it is ignored. max_object_distance_from_source: Maximum distance of objects from source. It will be ignored if None. Returns: A dictionary of input tensors with standard field names. """ prepared_inputs = {} if standard_fields.InputDataFields.point_positions in inputs: prepared_inputs[standard_fields.InputDataFields.point_positions] = inputs[ standard_fields.InputDataFields.point_positions] if standard_fields.InputDataFields.point_intensities in inputs: prepared_inputs[standard_fields.InputDataFields.point_intensities] = inputs[ standard_fields.InputDataFields.point_intensities] if standard_fields.InputDataFields.point_elongations in inputs: prepared_inputs[standard_fields.InputDataFields.point_elongations] = inputs[ standard_fields.InputDataFields.point_elongations] if standard_fields.InputDataFields.point_normals in inputs: prepared_inputs[standard_fields.InputDataFields.point_normals] = inputs[ standard_fields.InputDataFields.point_normals] if 'cameras/front/intrinsics/K' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_intrinsics] = inputs['cameras/front/intrinsics/K'] if 'cameras/front/extrinsics/R' in inputs: prepared_inputs[ standard_fields.InputDataFields .camera_rotation_matrix] = inputs['cameras/front/extrinsics/R'] if 'cameras/front/extrinsics/t' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_translation] = inputs['cameras/front/extrinsics/t'] if 'cameras/front/image' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_image] = inputs['cameras/front/image'] prepared_inputs[standard_fields.InputDataFields .camera_raw_image] = inputs['cameras/front/image'] prepared_inputs[standard_fields.InputDataFields .camera_original_image] = inputs['cameras/front/image'] if 'scene_name' in inputs and 'frame_name' in inputs: prepared_inputs[ standard_fields.InputDataFields.camera_image_name] = tf.strings.join( [inputs['scene_name'], inputs['frame_name']], separator='_') if 'objects/pose/R' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_rotation_matrix] = inputs['objects/pose/R'] if 'objects/pose/t' in inputs: prepared_inputs[standard_fields.InputDataFields .objects_center] = inputs['objects/pose/t'] if 'objects/shape/dimension' in inputs: prepared_inputs[ standard_fields.InputDataFields.objects_length] = tf.reshape( inputs['objects/shape/dimension'][:, 0], [-1, 1]) prepared_inputs[standard_fields.InputDataFields.objects_width] = tf.reshape( inputs['objects/shape/dimension'][:, 1], [-1, 1]) prepared_inputs[ standard_fields.InputDataFields.objects_height] = tf.reshape( inputs['objects/shape/dimension'][:, 2], [-1, 1]) if 'objects/category/label' in inputs: prepared_inputs[standard_fields.InputDataFields.objects_class] = tf.reshape( inputs['objects/category/label'], [-1, 1]) if valid_object_classes is not None: valid_objects_mask = tf.cast( tf.zeros_like( prepared_inputs[standard_fields.InputDataFields.objects_class], dtype=tf.int32), dtype=tf.bool) for object_class in valid_object_classes: valid_objects_mask = tf.logical_or( valid_objects_mask, tf.equal( prepared_inputs[standard_fields.InputDataFields.objects_class], object_class)) valid_objects_mask = tf.reshape(valid_objects_mask, [-1]) for key in standard_fields.get_input_object_fields(): if key in prepared_inputs: prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key], valid_objects_mask) if max_object_distance_from_source is not None: if standard_fields.InputDataFields.objects_center in prepared_inputs: object_distances = tf.norm( prepared_inputs[standard_fields.InputDataFields.objects_center][:, 0:2], axis=1) valid_mask = tf.less(object_distances, max_object_distance_from_source) for key in standard_fields.get_input_object_fields(): if key in prepared_inputs: prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key], valid_mask) return prepared_inputs