def _Extract(self, features): """Returns the laser Tensor.""" p = self.params all_xyzs = [] all_laser_features = [] for feature_name in self.FeatureMap(): laser_data = tf.reshape( _Dense(features[feature_name]), [-1, 3 + p.num_features]) points_xyz = laser_data[..., 0:3] points_feature = laser_data[..., 3:] all_xyzs += [points_xyz] all_laser_features += [points_feature] # Stack all of the points along the major dimension points_xyz = tf.concat(all_xyzs, axis=0) points_feature = tf.concat(all_laser_features, axis=0) if p.max_num_points is not None: npoints = tf.shape(points_xyz)[0] points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3]) points_feature = py_utils.PadOrTrimTo(points_feature, [p.max_num_points, p.num_features]) points_padding = 1.0 - py_utils.PadOrTrimTo( tf.ones([npoints]), [p.max_num_points]) else: points_padding = None return py_utils.NestedMap( points_xyz=points_xyz, points_feature=points_feature, points_padding=points_padding)
def _TokenizeOneSentence(i, text, token_ids_ta, target_ids_ta, paddings_ta): """Tokenizes a single sentence.""" if tf.is_tensor(i): text_i = tf.gather(text, i) else: text_i = text[i] ids = self._tokenizer.tokenize(text_i).merge_dims(0, -1) ids.set_shape([None]) if append_eos: ids = tf.concat([ids, [self.eos_id]], axis=0) sos_ids = tf.concat([[self.sos_id], ids], axis=0) if p.prepend_sos: ids = sos_ids # This truncates after the EOS is added, so some sentences might # not have EOS at the end. token_ids_ta = token_ids_ta.write( i, py_utils.PadOrTrimTo(sos_ids, [max_length], 0)) target_ids_ta = target_ids_ta.write( i, py_utils.PadOrTrimTo(ids, [max_length], 0)) paddings_ta = paddings_ta.write( i, py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32), [max_length], 1.)) return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta
def _InputBatch(self): p = self.params @tf.function def ReadData(): x, y = io_ops.restore_v2(p.ckpt, [p.data, p.label], [''] * 2, [p.data_dtype, p.label_dtype]) # Always convert to float32. return tf.cast(x, tf.float32), tf.cast(y, tf.float32) # Loads data and label into memory and keep it around. data, label = ops.cached_call(f=ReadData.get_concrete_function(), T=[tf.float32, tf.float32]) b, shape = self.InfeedBatchSize(), list(p.data_shape) data = tf.reshape(data, [-1] + shape) label = tf.reshape(label, [-1]) label = py_utils.HasShape(label, [tf.shape(data)[0]]) sample_ids = ops.random_permutation_sequence( num=p.num_samples, batch=b, repeat=p.repeat, seed=p.random_seed if p.random_seed else 0) n = tf.shape(sample_ids)[0] raw = py_utils.PadOrTrimTo(tf.gather(data, sample_ids), [b] + shape) ret = py_utils.NestedMap( raw=raw, data=self._Preprocess(raw), label=py_utils.PadOrTrimTo(tf.gather(label, sample_ids), [b]), weight=py_utils.PadOrTrimTo(tf.ones([n], dtype=tf.float32), [b])) if not py_utils.use_tpu(): ret['sample_ids'] = sample_ids return ret
def BatchedOrientedNMSIndices(self, bboxes, scores, nms_iou_threshold, score_threshold, max_boxes_per_class): """Runs batched version of a Per-Class 3D (7-DOF) Non Max Suppression. All outputs have shape [batch_size, num_classes, max_boxes_per_class]. Args: bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [batch_size, num_boxes, num_classes] floating point Tensor containing box scores. nms_iou_threshold: Either a float or a list of floats of len num_classes with the IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: Either a float or a list of floats of len num_classes with the score threshold that allows NMS to quickly ignore boxes. max_boxes_per_class: An integer scalar with the maximum number of boxes per example to emit per class. Returns: A tuple of 3 tensors: - bbox_indices: An int32 Tensor with the indices of the chosen boxes. Values are in sort order until the class_idx switches. - bbox_scores: A float32 Tensor with the score for each box. - valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. """ bboxes = py_utils.HasShape(bboxes, [-1, -1, 7]) batch_size, num_boxes = py_utils.GetShape(bboxes, 2) scores = py_utils.HasShape(scores, [batch_size, num_boxes, -1]) _, _, num_classes = py_utils.GetShape(scores) # Force the thresholds to be tensors of len num_classes nms_iou_threshold = tf.broadcast_to( tf.convert_to_tensor(nms_iou_threshold), [num_classes]) score_threshold = tf.broadcast_to( tf.convert_to_tensor(score_threshold), [num_classes]) def NMSBody(args): per_sample_bboxes, per_sample_scores = args indices, scores, mask = ops.non_max_suppression_3d( per_sample_bboxes, per_sample_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_boxes_per_class=max_boxes_per_class) return indices, scores, mask bbox_indices, bbox_scores, valid_mask = tf.map_fn( fn=NMSBody, elems=(bboxes, scores), dtype=(tf.int32, tf.float32, tf.float32), back_prop=False) output_shape = [batch_size, num_classes, max_boxes_per_class] bbox_indices = py_utils.PadOrTrimTo(bbox_indices, output_shape) bbox_scores = py_utils.PadOrTrimTo(bbox_scores, output_shape) valid_mask = py_utils.PadOrTrimTo(valid_mask, output_shape) return bbox_indices, bbox_scores, valid_mask
def _Extract(self, features): """Returns the laser Tensor.""" p = self.params all_xyzs = [] all_laser_features = [] for lidar in p.lidar_names: for ri in p.lidar_returns: feature_name = 'laser_%s_%s' % (lidar, ri) laser_data = tf.reshape( _Dense(features[feature_name]), [-1, 3 + p.num_features]) points_xyz = laser_data[..., 0:3] points_feature = laser_data[..., 3:] all_xyzs += [points_xyz] all_laser_features += [points_feature] # Stack all of the points along the major dimension points_xyz = tf.concat(all_xyzs, axis=0) points_feature = tf.concat(all_laser_features, axis=0) if p.max_num_points is not None: npoints = tf.shape(points_xyz)[0] points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3]) points_feature = py_utils.PadOrTrimTo(points_feature, [p.max_num_points, p.num_features]) points_padding = 1.0 - py_utils.PadOrTrimTo( tf.ones([npoints]), [p.max_num_points]) ret = py_utils.NestedMap( points_xyz=points_xyz, points_feature=points_feature) if p.max_num_points is not None: ret.points_padding = points_padding return ret
def _InputBatchFromCKPT(self): p = self.params @function.Defun() def ReadData(): x, = io_ops.restore_v2(p.ckpt, [p.data], [''], [p.data_dtype]) return x # Loads data and label into memory and keep it around. data, = py_x_ops.cached_call(f=ReadData, T=[p.data_dtype]) b = p.batch_size total_length = p.data_shape[0] total_batches = total_length // b total_steps = total_batches // p.num_steps left_over = total_batches % p.num_steps > 0 if left_over: total_steps += 1 if p.eval: dataset = tf.data.Dataset.range(total_steps).repeat() iterator = dataset.make_one_shot_iterator() global_step = iterator.get_next() else: global_step = py_utils.GetOrCreateGlobalStep() - 1 batch_id = tf.to_int32(global_step % total_steps) data = data[:total_batches * b] data = tf.reshape(data, [b, total_batches]) start = p.num_steps * batch_id end = tf.minimum(tf.to_int32(total_batches), start + p.num_steps) raw = tf.gather(data, tf.range(start, end, dtype=tf.int32), axis=1, name='ids') label_end = tf.minimum(end + 1, tf.to_int32(total_batches)) label = tf.gather(data, tf.range(start + 1, label_end, dtype=tf.int32), axis=1, name='labels') raw = py_utils.PadOrTrimTo(raw, [b, end - start]) ret = py_utils.NestedMap() # raw = tf.reshape(data[:700], [20, 35]) # ret.ids = raw # ret.labels = raw # ret.weights = tf.ones([20, 35]) # ret.paddings = 1.0 - ret.weights # ret.word_count = 700 # ret.take_last_state = py_utils.GetOrCreateGlobalStep() > 0 ret.ids = raw ret.labels = py_utils.PadOrTrimTo(label, [b, end - start]) ret.weights = py_utils.PadOrTrimTo(tf.ones([b, label_end - start], dtype=tf.float32), [b, end - start]) ret.paddings = 1.0 - ret.weights ret.word_count = b * (label_end - start - 1) ret.take_last_state = batch_id > 0 return ret
def _Extract(self, features): p = self.params # Label values match the proto enum car.open_dataset.Label.Type. The value # range is [1..4] for non-background labels. labels = tf.cast(_Dense(features['labels']), tf.int32) labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects]) label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1]) label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '') bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7]) bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]]) bboxes_3d_num_points = tf.cast( _Dense(features['bboxes_3d_num_points']), tf.int32) bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7]) bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask, [p.max_num_objects]) bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points, [p.max_num_objects]) label_metadata = tf.reshape(_Dense(features['label_metadata']), [-1, 4]) label_metadata = py_utils.PadOrTrimTo(label_metadata, [p.max_num_objects, 4]) detection_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['detection_difficulties']), tf.int32), [p.max_num_objects]) single_frame_detection_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['single_frame_detection_difficulties']), tf.int32), [p.max_num_objects]) tracking_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['tracking_difficulties']), tf.int32), [p.max_num_objects]) unfiltered_bboxes_3d_mask = bboxes_3d_mask if p.filter_labels: valid_labels = tf.constant([p.filter_labels]) bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1), valid_labels), axis=1) bboxes_3d_mask *= tf.cast(bbox_mask, tf.float32) outputs = { 'labels': labels, 'label_ids': label_ids, 'detection_difficulties': detection_difficulties, 'single_frame_detection_difficulties': single_frame_detection_difficulties, 'tracking_difficulties': tracking_difficulties, 'bboxes_3d': bboxes_3d, 'bboxes_3d_mask': bboxes_3d_mask, 'bboxes_3d_num_points': bboxes_3d_num_points, 'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask, 'speed': label_metadata[:, :2], 'acceleration': label_metadata[:, 2:], } return py_utils.NestedMap(outputs)
def _NestedMapFromBatchedOutputs(self, outputs): """Create a NestedMap from a tuple of outputs from generic_input_op.""" batch_size = self.InfeedBatchSize() shapes = self.Shape() shapes.VLog(0, 'input extractor shape: ') flatten_shapes = shapes.Flatten() dtypes = self.DType() flatten_dtypes = dtypes.FlattenItems() assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format( len(flatten_shapes), len(outputs)) assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format( len(flatten_dtypes), len(outputs)) rets = [] for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes, flatten_shapes): assert dtype == output.dtype, '{}: {} vs. {}'.format( name, dtype, output.dtype) # Pad every output to make shapes fixed according to the corresponding # declared shape, since the shapes of outputs are lost through # generic_input_op. try: shape.assert_is_fully_defined() except ValueError as e: raise ValueError('Invalid shape for %s: %s' % (name, e)) padded = py_utils.PadOrTrimTo(output, [batch_size] + shape.as_list()) rets += [padded] rets = shapes.Pack(rets) if py_utils.use_tpu(): # Drops tf.string tensors, which is not supported on TPUs. rets = rets.Filter(lambda x: x.dtype != tf.string) return rets
def _Extract(self, features): p = self.params if p.decode_image: raw = features['image/encoded'] image = tf.image.decode_png(raw, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # Padding instead of rescaling to preserve the pixel coordinates. image = py_utils.PadOrTrimTo( image, [self._KITTI_MAX_HEIGHT, self._KITTI_MAX_WIDTH, 3]) width = tf.reshape(features['image/width'], [1]) height = tf.reshape(features['image/height'], [1]) velo_to_image_plane = features['transform/velo_to_image_plane'] velo_to_camera = features['transform/velo_to_camera'] camera_to_velo = features['transform/camera_to_velo'] extracted_features = py_utils.NestedMap( width=width, height=height, velo_to_image_plane=velo_to_image_plane, velo_to_camera=velo_to_camera, camera_to_velo=camera_to_velo) if p.decode_image: extracted_features.image = image return extracted_features
def _NestedMapFromBatchedOutputs(self, outputs): """Create a NestedMap from a tuple of outputs from generic_input_op.""" batch_size = self.InfeedBatchSize() shapes = self.Shape() shapes.VLog(0, 'input extractor shape: ') flatten_shapes = shapes.Flatten() dtypes = self.DType() flatten_dtypes = dtypes.FlattenItems() assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format( len(flatten_shapes), len(outputs)) assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format( len(flatten_dtypes), len(outputs)) rets = [] for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes, flatten_shapes): assert dtype == output.dtype, '{}: {} vs. {}'.format( name, dtype, output.dtype) # Pad every output to make shapes fixed according to the corresponding # declared shape, since the shapes of outputs are lost through # generic_input_op. try: shape.assert_is_fully_defined() except ValueError as e: raise ValueError('Invalid shape for %s: %s' % (name, e)) padded = py_utils.PadOrTrimTo(output, [batch_size] + shape.as_list()) rets += [padded] rets = shapes.Pack(rets) # String tensors in rets will be filtered out from being sent to the # device automatically, and instead will be present in CPU passthrough. return rets
def _Extract(self, features): p = self.params points_xyz = tf.reshape(_Dense(features['pointcloud/xyz']), [-1, 3]) points_feature = tf.reshape( _Dense(features['pointcloud/reflectance']), [-1, p.num_features]) if p.max_num_points is not None: npoints = tf.shape(points_xyz)[0] points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3]) points_feature = py_utils.PadOrTrimTo(points_feature, [p.max_num_points, p.num_features]) points_padding = 1.0 - py_utils.PadOrTrimTo( tf.ones([npoints]), [p.max_num_points]) ret = py_utils.NestedMap( points_xyz=points_xyz, points_feature=points_feature) if p.max_num_points is not None: ret.points_padding = points_padding return ret
def _Extract(self, features): """Returns the laser Tensor.""" p = self.params ret = super()._Extract(features) all_vxyz = [] all_classes = [] for lidar in p.lidar_names: for ri in p.lidar_returns: feature_name = 'laser_%s_%s' % (lidar, ri) laser_data = tf.reshape( _Dense(features[feature_name]), [-1, 3 + p.num_features]) num = py_utils.GetShape(laser_data)[0] # We expect lidar_$lidar_$ri and lidar_$lidar_$ri_flow has # same number of points. feature_name += '_flow' laser_data = tf.reshape(_Dense(features[feature_name]), [num, 3 + 1]) points_vxyz = laser_data[..., 0:3] points_classes = laser_data[..., 3] all_vxyz += [points_vxyz] all_classes += [points_classes] # Stack all of the points along the major dimension points_vxyz = tf.concat(all_vxyz, axis=0) points_class = tf.concat(all_classes, axis=0) # The precomputed class uses -1 to mean 5 in our current code. points_class = tf.where( tf.less(points_class, 0), 5. * tf.ones_like(points_class), points_class) if p.max_num_points is not None: assert 'points_padding' in ret points_vxyz = py_utils.PadOrTrimTo(points_vxyz, [p.max_num_points, 3]) points_class = py_utils.PadOrTrimTo(points_class, [p.max_num_points]) assert 'points_xyz' in ret ret.world_flow = points_vxyz ret.pointwise_class = tf.cast(points_class, tf.int32) return ret
def _TokenizeOneSentence(i, strs, token_ids_ta, target_ids_ta, paddings_ta): """Tokenizes a single sentence.""" ids, _ = self._wpm_encoder.Encode(strs[i]) if append_eos: ids = tf.concat([ids, [self.eos_id]], axis=0) # This truncates after the eos is added, so some sentences might # not have </s> at the end. token_ids_ta = token_ids_ta.write( i, py_utils.PadOrTrimTo(tf.concat([[self.sos_id], ids], axis=0), [max_length], self.eos_id)) target_ids_ta = target_ids_ta.write( i, py_utils.PadOrTrimTo(ids, [max_length], self.eos_id)) paddings_ta = paddings_ta.write( i, py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32), [max_length], 1.)) return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta
def _Extract(self, features): """Returns the image Tensor.""" outputs = py_utils.NestedMap() p = self.params for camera_name in p.camera_names: image_shape = tf.reshape( _Dense(features['image_%s_shape' % camera_name]), [-1]) image_shape = tf.cast(image_shape, tf.int32) if p.decode_image: image = tf.io.decode_png( tf.strings.reduce_join( _Dense(features['image_%s' % camera_name], default_value=''))) image = tf.reshape(image, image_shape) image = py_utils.PadOrTrimTo(image, p.image_shape) intrinsics = tf.reshape( _Dense(features['camera_%s_intrinsics' % camera_name]), [9]) extrinsics = tf.reshape( _Dense(features['camera_%s_extrinsics' % camera_name]), [4, 4]) pose = tf.reshape(_Dense(features['image_%s_pose' % camera_name]), [4, 4]) velocity = tf.reshape( _Dense(features['image_%s_velocity' % camera_name]), [6]) outputs[camera_name] = py_utils.NestedMap() if p.decode_image: outputs[camera_name]['image'] = tf.cast( image, p.image_output_dtype) outputs[camera_name]['image_shape'] = image_shape outputs[camera_name]['intrinsics'] = intrinsics outputs[camera_name]['extrinsics'] = extrinsics outputs[camera_name]['pose'] = pose outputs[camera_name]['velocity'] = velocity outputs[camera_name]['rolling_shutter_direction'] = features[ 'camera_%s_rolling_shutter_direction' % camera_name] for feat in [ 'shutter', 'camera_trigger_time', 'camera_readout_done_time', 'pose_timestamp' ]: outputs[camera_name][feat] = features['image_%s_%s' % (camera_name, feat)] return outputs
def BatchedNMSIndices(self, bboxes, scores, nms_iou_threshold=0.3, score_threshold=0.01, max_num_boxes=None): """Batched version of NMSIndices. Args: bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [batch_size, num_boxes, num_classes] floating point Tensor containing box scores. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. max_num_boxes: The maximum number of boxes per example to emit. If None, this value is set to num_boxes from the shape of bboxes. Returns: The NMS indices and the mask of the padded indices for each example in the batch. """ batch_size, num_boxes = py_utils.GetShape(bboxes, 2) if max_num_boxes is not None: max_output_size = max_num_boxes else: max_output_size = num_boxes output_shape = [batch_size, max_output_size] def NMSBody(args): bbox, score = args return self.NMSIndices(bbox, score, max_output_size, nms_iou_threshold, score_threshold) nms_indices, valid_mask = tf.map_fn( fn=NMSBody, elems=(bboxes, scores), dtype=(tf.int32, tf.float32), back_prop=False) nms_indices = py_utils.PadOrTrimTo(nms_indices, output_shape) return nms_indices, valid_mask
def NestedMapFromBatchedOutputs(self, outputs): """Create a NestedMap from a list/tuple of batched outputs. Args: outputs: A tuple or list of Tensors whose order matches the flattened structure of Shape() and DType(). Returns: A NestedMap reconstructing the structure of the output of extractors and preprocessors, where each Tensor's shape is statically padded/trimmed to match the Shape() specification. Raises: ValueError: If `outputs` contains a shape that is not fully defined. AssertionError: If any shape of a Tensor in `outputs` cannot be PadOrTrimTo'd by the corresponding Shape() specification. """ batch_size = self.InfeedBatchSize() shapes = self.Shape() shapes.VLog(0, 'input extractor shape: ') flatten_shapes = shapes.Flatten() dtypes = self.DType() flatten_dtypes = dtypes.FlattenItems() assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format( len(flatten_shapes), len(outputs)) assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format( len(flatten_dtypes), len(outputs)) rets = [] assertion_errors = [] for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes, flatten_shapes): assert dtype == output.dtype, '{}: {} vs. {}'.format( name, dtype, output.dtype) # Pad every output to make shapes fixed according to the corresponding # declared shape, since the shapes of outputs are lost through # generic_input_op. try: shape.assert_is_fully_defined() except ValueError as e: raise ValueError('Invalid shape for %s: %s' % (name, e)) curr_shape = py_utils.GetShape(output) padded_shape = shape.as_list() if not self.params.batched_input: padded_shape = [batch_size] + padded_shape try: padded = py_utils.PadOrTrimTo(output, padded_shape) rets.append(padded) except AssertionError as e: assertion_errors += [f'{name}: {e}, ({curr_shape} vs. {padded_shape}'] if assertion_errors: raise AssertionError('Mismatched shapes:\n' + '\n'.join(assertion_errors)) rets = shapes.Pack(rets) # String tensors in rets will be filtered out from being sent to the # device automatically, and instead will be present in CPU passthrough. return rets
def PadOrTrimDimension(tensor: tf.Tensor, new_size: int, axis: int) -> tf.Tensor: tensor.shape.with_rank_at_least(abs(axis)) shape = py_utils.GetShape(tensor) return py_utils.PadOrTrimTo(tensor, shape[:axis] + [new_size] + shape[axis + 1:])
def _Extract(self, features): p = self.params source_id = py_utils.HasShape(features['image/source_id'], []) xmin = _Dense(features['object/image/bbox/xmin']) xmax = _Dense(features['object/image/bbox/xmax']) ymin = _Dense(features['object/image/bbox/ymin']) ymax = _Dense(features['object/image/bbox/ymax']) # 2d bounding box in image coordinates. bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1) bboxes_count = tf.shape(bboxes)[0] bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4]) bboxes_padding = 1.0 - py_utils.PadOrTrimTo(tf.ones([bboxes_count]), [p.max_num_objects]) dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']), [-1, 3]) loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3]) phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1]) # bboxes_3d is in [x, y, z, dx, dy, dz, phi]. bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1) cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1) bboxes_td = tf.stack([ cy - dy / 2, cx - dx / 2, cy + dy / 2, cx + dx / 2, ], axis=-1) # pyformat: disable bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4]) has_3d_info = tf.cast(_Dense(features['object/has_3d_info']), tf.float32) bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects]) bboxes_td_mask = bboxes_3d_mask # Fill in difficulties from bounding box height, truncation and occlusion. bb_height = ymax - ymin box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects]) box_image_height *= bboxes_3d_mask # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly, occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1]) occlusion = tf.cast(occlusion, tf.float32) occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects]) occlusion *= bboxes_3d_mask # Truncation: 0 -> not truncated, 1.0 -> truncated truncation = tf.reshape(_Dense(features['object/truncation']), [-1]) truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects]) truncation *= bboxes_3d_mask difficulties = ComputeKITTIDifficulties(box_image_height, occlusion, truncation) difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects]) # Make a batch axis to call BBoxCorners, and take the first result back. bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0] # Project the 3D bbox to the image plane. velo_to_image_plane = features['transform/velo_to_image_plane'] bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane( tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane) # Output is [num_objects, 8 corners per object, (x, y)]. bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane, [-1, 8, 2]) bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo( bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2]) texts = features['object/label'].values labels = ops.static_map_string_int(x=texts, keys=self.KITTI_CLASS_NAMES) labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects]) texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects]) # Filter labels by setting bboxes_padding, bboxes_3d_mask, and # bboxes_td_mask appropriately. if p.filter_labels is not None: valid_labels = tf.constant([p.filter_labels]) bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1), valid_labels), axis=1) bbox_mask = tf.cast(bbox_mask, tf.float32) bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding) filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask bboxes_td_mask *= bbox_mask else: filtered_bboxes_3d_mask = bboxes_3d_mask # Placeholder for counting the number of laser points that reside within # each 3-d bounding box. This must be filled in outside of this function # based on the loaded 3-d laser points. bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32) bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points, [p.max_num_objects]) # Pad bboxes_3d. bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7]) return py_utils.NestedMap( source_id=source_id, bboxes_count=bboxes_count, bboxes=bboxes, bboxes_padding=bboxes_padding, bboxes_3d=bboxes_3d, bboxes_3d_mask=filtered_bboxes_3d_mask, unfiltered_bboxes_3d_mask=bboxes_3d_mask, bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane, bboxes_td=bboxes_td, bboxes_td_mask=bboxes_td_mask, bboxes_3d_num_points=bboxes_3d_num_points, labels=labels, texts=texts, box_image_height=box_image_height, occlusion=occlusion, truncation=truncation, difficulties=difficulties)