def decode(self, tf_example_string_tensor): """Decodes serialized tensorflow example and returns a tensor dictionary. Args: tf_example_string_tensor: a string tensor holding a serialized tensorflow example proto. Returns: A dictionary of the following tensors. fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] containing image. fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of shape [2] containing shape of the image. fields.InputDataFields.source_id - string tensor containing original image id. fields.InputDataFields.key - string tensor with unique sha256 hash key. fields.InputDataFields.filename - string tensor with original dataset filename. fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape [None, 4] containing box corners. fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape [None] containing classes for the boxes. fields.InputDataFields.groundtruth_weights - 1D float32 tensor of shape [None] indicating the weights of groundtruth boxes. fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape [None] containing containing object mask area in pixel squared. fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape [None] indicating if the boxes enclose a crowd. Optional: fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of shape [None] indicating if a class is present in the image (1.0) or a class is not present in the image (0.0). fields.InputDataFields.image_additional_channels - 3D uint8 tensor of shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim is width; 3rd dim is the number of additional channels. fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape [None] indicating if the boxes represent `difficult` instances. fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape [None] indicating if the boxes represent `group_of` instances. fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of shape [None, num_keypoints, 2] containing keypoints, where the coordinates of the keypoints are ordered (y, x). fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool tensor of shape [None, num_keypoints] containing keypoint visibilites. fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of shape [None, None, None] containing instance masks. fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape [None] containing classes for the boxes. fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape [None * num_classes] containing flattened multiclass scores for groundtruth boxes. fields.InputDataFields.context_features - 1D float32 tensor of shape [context_feature_length * num_context_features] fields.InputDataFields.context_feature_length - int32 tensor specifying the length of each feature in context_features """ serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, self.items_to_handlers) keys = decoder.list_items() tensors = decoder.decode(serialized_example, items=keys) tensor_dict = dict(zip(keys, tensors)) is_crowd = fields.InputDataFields.groundtruth_is_crowd tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape( tensor_dict[fields.InputDataFields.image])[:2] if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[fields.InputDataFields.image_additional_channels] = channels def default_groundtruth_weights(): return tf.ones( [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( tf.greater( tf.shape( tensor_dict[fields.InputDataFields.groundtruth_weights])[0], 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], default_groundtruth_weights) if fields.InputDataFields.groundtruth_keypoints in tensor_dict: # Set all keypoints that are not labeled to NaN. gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities visibilities_tiled = tf.tile( tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2]) tensor_dict[gt_kpt_fld] = tf.where( visibilities_tiled, tensor_dict[gt_kpt_fld], np.nan * tf.ones_like(tensor_dict[gt_kpt_fld])) if self._expand_hierarchy_labels: input_fields = fields.InputDataFields image_classes, image_confidences = self._expand_image_label_hierarchy( tensor_dict[input_fields.groundtruth_image_classes], tensor_dict[input_fields.groundtruth_image_confidences]) tensor_dict[input_fields.groundtruth_image_classes] = image_classes tensor_dict[input_fields.groundtruth_image_confidences] = ( image_confidences) box_fields = [ fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_is_crowd, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_weights, ] def expand_field(field_name): return self._expansion_box_field_labels( tensor_dict[input_fields.groundtruth_classes], tensor_dict[field_name]) # pylint: disable=cell-var-from-loop for field in box_fields: if field in tensor_dict: tensor_dict[field] = tf.cond( tf.size(tensor_dict[field]) > 0, lambda: expand_field(field), lambda: tensor_dict[field]) # pylint: enable=cell-var-from-loop tensor_dict[input_fields.groundtruth_classes] = ( self._expansion_box_field_labels( tensor_dict[input_fields.groundtruth_classes], tensor_dict[input_fields.groundtruth_classes], True)) if fields.InputDataFields.groundtruth_group_of in tensor_dict: group_of = fields.InputDataFields.groundtruth_group_of tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool) if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict: tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast( tensor_dict[fields.InputDataFields.groundtruth_dp_num_points], dtype=tf.int32) tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast( tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids], dtype=tf.int32) return tensor_dict
def _parse_single_example(example, options): """Parses a single tf.Example proto. Args: example: An Example proto. options: An instance of reader_pb2.Reader. Returns: A dictionary indexed by tensor name. """ # Initialize `keys_to_features`. keys_to_features = { TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string), TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string), TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64), TFExampleFields.img_bbox_label: tf.io.VarLenFeature(tf.string), TFExampleFields.img_bbox_score: tf.io.VarLenFeature(tf.float32), TFExampleFields.img_bbox_feature: tf.io.VarLenFeature(tf.float32), TFExampleFields.question: tf.io.VarLenFeature(tf.string), TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64), } for bbox_key in TFExampleFields.img_bbox_field_keys: bbox_field = os.path.join(TFExampleFields.img_bbox_scope, bbox_key) keys_to_features[bbox_field] = tf.io.VarLenFeature(tf.float32) for i in range(1, 1 + NUM_CHOICES): keys_to_features.update({ TFExampleFields.cls_bert + '_%i' % i: tf.io.VarLenFeature(tf.float32), TFExampleFields.question_bert + '_%i' % i: tf.io.VarLenFeature(tf.float32), TFExampleFields.answer_choice + '_%i' % i: tf.io.VarLenFeature(tf.string), TFExampleFields.answer_choice_tag + '_%i' % i: tf.io.VarLenFeature(tf.int64), TFExampleFields.answer_choice_bert + '_%i' % i: tf.io.VarLenFeature(tf.float32) }) # Initialize `items_to_handlers`. items_to_handlers = { InputFields.img_id: tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id, default_value=''), InputFields.annot_id: tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id, default_value=''), InputFields.answer_label: tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label, default_value=-1), InputFields.object_bboxes: tfexample_decoder.BoundingBox(keys=TFExampleFields.img_bbox_field_keys, prefix=TFExampleFields.img_bbox_scope), InputFields.object_labels: tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_label, default_value=''), InputFields.object_scores: tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_score, default_value=0), InputFields.question: tfexample_decoder.Tensor(tensor_key=TFExampleFields.question, default_value=PAD), InputFields.question_tag: tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag, default_value=-1), TFExampleFields.img_bbox_feature: tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_feature, default_value=0), } for i in range(1, 1 + NUM_CHOICES): tensor_key = TFExampleFields.cls_bert + '_%i' % i items_to_handlers[tensor_key] = tfexample_decoder.Tensor( tensor_key=tensor_key, default_value=0) tensor_key = TFExampleFields.question_bert + '_%i' % i items_to_handlers[tensor_key] = tfexample_decoder.Tensor( tensor_key=tensor_key, default_value=0) tensor_key = TFExampleFields.answer_choice + '_%i' % i items_to_handlers[tensor_key] = tfexample_decoder.Tensor( tensor_key=tensor_key, default_value=PAD) tensor_key = TFExampleFields.answer_choice_tag + '_%i' % i items_to_handlers[tensor_key] = tfexample_decoder.Tensor( tensor_key=tensor_key, default_value=-1) tensor_key = TFExampleFields.answer_choice_bert + '_%i' % i items_to_handlers[tensor_key] = tfexample_decoder.Tensor( tensor_key=tensor_key, default_value=0) if options.decode_jpeg: keys_to_features.update({ TFExampleFields.img_encoded: tf.io.FixedLenFeature([], tf.string), TFExampleFields.img_format: tf.io.FixedLenFeature([], tf.string), }) items_to_handlers.update({ InputFields.img_data: tfexample_decoder.Image(image_key=TFExampleFields.img_encoded, format_key=TFExampleFields.img_format, shape=None) }) # Decode example. example_decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) output_keys = example_decoder.list_items() output_tensors = example_decoder.decode(example) output_tensors = [ x if x.dtype != tf.int64 else tf.cast(x, tf.int32) for x in output_tensors ] decoded_example = dict(zip(output_keys, output_tensors)) return _update_decoded_example(decoded_example, options)
fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool tensor of shape [None, num_keypoints] containing keypoint visibilites. fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of shape [None, None, None] containing instance masks. fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape [None] containing classes for the boxes. fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape [None * num_classes] containing flattened multiclass scores for groundtruth boxes. fields.InputDataFields.context_features - 1D float32 tensor of shape [context_feature_length * num_context_features] fields.InputDataFields.context_feature_length - int32 tensor specifying the length of each feature in context_features """ serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, self.items_to_handlers) keys = decoder.list_items() tensors = decoder.decode(serialized_example, items=keys) tensor_dict = dict(zip(keys, tensors)) is_crowd = fields.InputDataFields.groundtruth_is_crowd tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape( tensor_dict[fields.InputDataFields.image])[:2] if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[fields.InputDataFields.image_additional_channels] = channels
def _parse_single_example(example, options): """Parses a single tf.Example proto. Args: example: An Example proto. options: An instance of reader_pb2.Reader. Returns: A dictionary indexed by tensor name. """ ################################### # Initialize `keys_to_features`. ################################### keys_to_features = { TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string), TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string), TFExampleFields.img_encoded: tf.io.FixedLenFeature([], tf.string), TFExampleFields.img_format: tf.io.FixedLenFeature([], tf.string), TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64), TFExampleFields.rationale_label: tf.io.FixedLenFeature([], tf.int64), TFExampleFields.detection_classes: tf.io.VarLenFeature(tf.string), TFExampleFields.detection_scores: tf.io.VarLenFeature(tf.float32), TFExampleFields.detection_boxes_ymin: tf.io.VarLenFeature(tf.float32), TFExampleFields.detection_boxes_ymax: tf.io.VarLenFeature(tf.float32), TFExampleFields.detection_boxes_xmin: tf.io.VarLenFeature(tf.float32), TFExampleFields.detection_boxes_xmax: tf.io.VarLenFeature(tf.float32), TFExampleFields.question: tf.io.VarLenFeature(tf.string), TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64), } # Answer and rationale choices. for i in range(NUM_CHOICES): keys_to_features.update({ TFExampleFields.answer_choice + '_%i' % i: tf.io.VarLenFeature(tf.string), TFExampleFields.answer_choice_tag + '_%i' % i: tf.io.VarLenFeature(tf.int64), TFExampleFields.rationale_choice + '_%i' % i: tf.io.VarLenFeature(tf.string), TFExampleFields.rationale_choice_tag + '_%i' % i: tf.io.VarLenFeature(tf.int64), }) ################################### # Initialize `items_to_handlers`. ################################### items_to_handlers = { 'annot_id': tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id, default_value=''), 'img_id': tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id, default_value=''), 'img_data': tfexample_decoder.Image(image_key=TFExampleFields.img_encoded, format_key=TFExampleFields.img_format, shape=None), 'answer_label': tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label, default_value=-1), 'rationale_label': tfexample_decoder.Tensor(tensor_key=TFExampleFields.rationale_label, default_value=-1), 'detection_boxes': tfexample_decoder.BoundingBox( keys=TFExampleFields.detection_boxes_keys, prefix=TFExampleFields.detection_boxes_scope), 'detection_classes': tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_classes, default_value=PAD), 'detection_scores': tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_scores, default_value=0), 'question': tfexample_decoder.Tensor(tensor_key=TFExampleFields.question, default_value=PAD), 'question_tag': tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag, default_value=-1), } # Answer and rationale choices. for i in range(NUM_CHOICES): items_to_handlers['answer_choice_%i' % i] = tfexample_decoder.Tensor( tensor_key='answer_choice_%i' % i, default_value=PAD) items_to_handlers['answer_choice_tag_%i' % i] = tfexample_decoder.Tensor( tensor_key='answer_choice_tag_%i' % i, default_value=-1) items_to_handlers['rationale_choice_%i' % i] = tfexample_decoder.Tensor( tensor_key='rationale_choice_%i' % i, default_value=PAD) items_to_handlers['rationale_choice_tag_%i' % i] = tfexample_decoder.Tensor( tensor_key='rationale_choice_tag_%i' % i, default_value=-1) # Decode example. example_decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) output_keys = example_decoder.list_items() output_tensors = example_decoder.decode(example) output_tensors = [ x if x.dtype != tf.int64 else tf.cast(x, tf.int32) for x in output_tensors ] decoded_example = dict(zip(output_keys, output_tensors)) return _update_decoded_example(decoded_example, options)