def dataset_parser(self, value): """Parses an image and its label from a serialized ResNet-50 TFExample. Args: value: serialized string containing an ImageNet TFExample. Returns: Returns a tuple of (image, label) from the TFExample. """ keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, ''), 'image/format': tf.FixedLenFeature((), tf.string, 'jpeg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, -1), 'image/class/text': tf.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(value, keys_to_features) image_bytes = tf.reshape(parsed['image/encoded'], shape=[]) image = self.image_preprocessing_fn(image_bytes=image_bytes, is_training=self.is_training, image_size=self.image_size, use_bfloat16=self.use_bfloat16) # Subtract one so that labels are in [0, 1000). label = tf.cast(tf.reshape(parsed['image/class/label'], shape=[]), dtype=tf.int32) - 1 return image, label
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.float32), "targets": tf.VarLenFeature(tf.float32), } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def dataset_parser(self, value): """Parse an ImageNet record from a serialized string Tensor.""" keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, ''), 'image/format': tf.FixedLenFeature((), tf.string, 'jpeg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, -1), 'image/class/text': tf.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(value, keys_to_features) image_bytes = tf.reshape(parsed['image/encoded'], shape=[]) image = self.image_preprocessing_fn(image_bytes=image_bytes, is_training=self.is_training, use_bfloat16=self.use_bfloat16) # Subtract one so that labels are in [0, 1000), and cast to float32 for # Keras model. label = tf.cast( tf.cast(tf.reshape(parsed['image/class/label'], shape=[1]), dtype=tf.int32) - 1, dtype=tf.float32) return image, label
def test(): from brokenegg_transformer.utils import tokenizer import os import tensorflow.compat.v1 as tf data_dir = '/tmp/brokenegg_transformer' dataset = tf.data.TFRecordDataset( os.path.join(data_dir, 'brokenegg-train-00030-of-00030')) feature_description = { 'inputs': tf.VarLenFeature(dtype=tf.int64), 'targets': tf.VarLenFeature(dtype=tf.int64), } #feature_description = { # 'inputs': tf.FixedLenFeature(shape=[1, None], dtype=tf.int64), # 'targets': tf.FixedLenFeature(shape=[1, None], dtype=tf.int64), #} subtokenizer = tokenizer.Subtokenizer( os.path.join(data_dir, 'brokenegg.en-es-ja.spm64k.model')) for count, raw_record in enumerate(dataset): #print(raw_record) example = tf.train.Example() example.ParseFromString(raw_record.numpy()) #print(example) example = tf.io.parse_single_example(raw_record, feature_description) encoded_inputs = tf.sparse.to_dense(example['inputs']).numpy().tolist() encoded_targets = tf.sparse.to_dense( example['targets']).numpy().tolist() print('LANG: %d' % encoded_targets[0]) print('SRC: %s' % subtokenizer.decode(encoded_inputs)) print('TGT: %s' % subtokenizer.decode(encoded_targets[1:])) if count > 10: break
def _parse_record(self, record): """Parse an Imagenet record from a tf.Example.""" keys_to_features = { "image/encoded": tf.FixedLenFeature((), tf.string, ""), "image/format": tf.FixedLenFeature((), tf.string, "jpeg"), "image/class/label": tf.FixedLenFeature([], tf.int64, -1), "image/class/text": tf.FixedLenFeature([], tf.string, ""), "image/object/bbox/xmin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/xmax": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymax": tf.VarLenFeature(dtype=tf.float32), "image/object/class/label": tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(record, keys_to_features) image = tf.image.decode_image( tf.reshape(parsed["image/encoded"], shape=[]), _NUM_CHANNELS) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = preprocess_image(image=image, output_height=self._image_width, output_width=self._image_height, is_training=self._is_training) label = tf.cast(tf.reshape(parsed["image/class/label"], shape=[]), dtype=tf.int32) return image, label
def dataset_parser(self, value): """Parse an Imagenet record from value.""" keys_to_features = { "image/encoded": tf.FixedLenFeature((), tf.string, ""), "image/format": tf.FixedLenFeature((), tf.string, "jpeg"), "image/class/label": tf.FixedLenFeature([], tf.int64, -1), "image/class/text": tf.FixedLenFeature([], tf.string, ""), "image/object/bbox/xmin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/xmax": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymax": tf.VarLenFeature(dtype=tf.float32), "image/object/class/label": tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(value, keys_to_features) image = tf.image.decode_image( tf.reshape(parsed["image/encoded"], shape=[]), _NUM_CHANNELS) image = tf.image.convert_image_dtype(image, dtype=tf.float32) # TODO(shivaniagrawal): height and width of image from model image = vgg_preprocessing.preprocess_image( image=image, output_height=224, output_width=224, is_training=self.is_training) label = tf.cast(tf.reshape(parsed["image/class/label"], shape=[]), dtype=tf.int32) return image, tf.one_hot(label, _LABEL_CLASSES)
def _count_matrix_input(self, filenames, submatrix_rows, submatrix_cols): """Creates ops that read submatrix shards from disk.""" random.shuffle(filenames) filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'global_row': tf.FixedLenFeature([submatrix_rows], dtype=tf.int64), 'global_col': tf.FixedLenFeature([submatrix_cols], dtype=tf.int64), 'sparse_local_row': tf.VarLenFeature(dtype=tf.int64), 'sparse_local_col': tf.VarLenFeature(dtype=tf.int64), 'sparse_value': tf.VarLenFeature(dtype=tf.float32) }) global_row = features['global_row'] global_col = features['global_col'] sparse_local_row = features['sparse_local_row'].values sparse_local_col = features['sparse_local_col'].values sparse_count = features['sparse_value'].values sparse_indices = tf.concat( axis=1, values=[tf.expand_dims(sparse_local_row, 1), tf.expand_dims(sparse_local_col, 1)]) count = tf.sparse_to_dense(sparse_indices, [submatrix_rows, submatrix_cols], sparse_count) return global_row, global_col, count
def _parser(serialized_example): """Parses a single tf.Example into image and label tensors.""" features = tf.parse_example( [serialized_example], features={ 'image/encoded': tf.VarLenFeature(dtype=tf.float32), 'image/segmentation/mask': tf.VarLenFeature(dtype=tf.float32), }) image = features['image/encoded'] if isinstance(image, tf.SparseTensor): image = tf.sparse_tensor_to_dense(image) gt_mask = features['image/segmentation/mask'] if isinstance(gt_mask, tf.SparseTensor): gt_mask = tf.sparse_tensor_to_dense(gt_mask) image_size, label_size = self.get_input_shapes(params) image = tf.reshape(image, image_size) gt_mask = tf.reshape(gt_mask, label_size) if params.use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) gt_mask = tf.cast(gt_mask, dtype=tf.bfloat16) logging.info('debug input %s %s', image, gt_mask) return image, gt_mask
def _decode(self, serialized_example): example = tf.parse_single_example( serialized_example, features={ "mix_audio": tf.VarLenFeature(tf.float32), "s1_audio": tf.VarLenFeature(tf.float32), "s1_f0": tf.VarLenFeature(tf.float32), "s1_loudness": tf.VarLenFeature(tf.float32), "s2_audio": tf.VarLenFeature(tf.float32), "s2_f0": tf.VarLenFeature(tf.float32), "s2_loudness": tf.VarLenFeature(tf.float32), "s3_audio": tf.VarLenFeature(tf.float32), "s3_f0": tf.VarLenFeature(tf.float32), "s3_loudness": tf.VarLenFeature(tf.float32), }, ) mix = tf.sparse_tensor_to_dense(example["mix_audio"]) s1_audio = tf.sparse_tensor_to_dense(example["s1_audio"]) s1_f0 = tf.sparse_tensor_to_dense(example["s1_f0"]) s1_loudness = tf.sparse_tensor_to_dense(example["s1_loudness"]) s2_audio = tf.sparse_tensor_to_dense(example["s2_audio"]) s2_f0 = tf.sparse_tensor_to_dense(example["s2_f0"]) s2_loudness = tf.sparse_tensor_to_dense(example["s2_loudness"]) s3_audio = tf.sparse_tensor_to_dense(example["s3_audio"]) s3_f0 = tf.sparse_tensor_to_dense(example["s3_f0"]) s3_loudness = tf.sparse_tensor_to_dense(example["s3_loudness"]) audios = tf.stack([mix, s1_audio, s2_audio, s3_audio]) f0s = tf.stack([s1_f0, s2_f0, s3_f0]) loudness = tf.stack([s1_loudness, s2_loudness, s3_loudness]) return audios, f0s, loudness
def dataset_parser(self, value): """Parse an ImageNet record from a serialized string Tensor.""" keys_to_features = { "image/encoded": tf.FixedLenFeature((), tf.string, ""), "image/format": tf.FixedLenFeature((), tf.string, "jpeg"), "image/class/label": tf.FixedLenFeature([], tf.int64, -1), "image/class/text": tf.FixedLenFeature([], tf.string, ""), "image/object/bbox/xmin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymin": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/xmax": tf.VarLenFeature(dtype=tf.float32), "image/object/bbox/ymax": tf.VarLenFeature(dtype=tf.float32), "image/object/class/label": tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(value, keys_to_features) image = tf.reshape(parsed["image/encoded"], shape=[]) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = self.image_preprocessing_fn( image=image, output_height=224, output_width=224, is_training=self.is_training, ) label = tf.cast( tf.reshape(parsed["image/class/label"], shape=[]), dtype=tf.int32 ) return image, tf.one_hot(label, _LABEL_CLASSES)
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "section_boundaries": tf.VarLenFeature(tf.int64), } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.int64), "audio/sample_count": tf.FixedLenFeature((), tf.int64), "audio/sample_width": tf.FixedLenFeature((), tf.int64), "targets": tf.VarLenFeature(tf.int64), } return data_fields, None
def example_reading_spec(self): data_fields = { "waveforms": tf.VarLenFeature(tf.float32), "targets": tf.VarLenFeature(tf.int64), } data_items_to_decoders = None return data_fields, data_items_to_decoders
def example_reading_spec(self): data_fields = { "inputs": tf.VarLenFeature(tf.int64), "inputs_extend": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "targets_extend": tf.VarLenFeature(tf.int64) } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def __init__(self): """Constructor sets keys_to_features and items_to_handlers.""" self.keys_to_context_features = { 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''), 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), 'image/height': tf.FixedLenFeature((), tf.int64, 1), 'image/width': tf.FixedLenFeature((), tf.int64, 1), } self.keys_to_features = { 'image/encoded': tf.FixedLenSequenceFeature((), tf.string), 'bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'bbox/label/index': tf.VarLenFeature(dtype=tf.int64), 'bbox/label/string': tf.VarLenFeature(tf.string), 'area': tf.VarLenFeature(tf.float32), 'is_crowd': tf.VarLenFeature(tf.int64), 'difficult': tf.VarLenFeature(tf.int64), 'group_of': tf.VarLenFeature(tf.int64), } self.items_to_handlers = { fields.InputDataFields.image: tfexample_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3, repeated=True), fields.InputDataFields.source_id: (tfexample_decoder.Tensor('image/source_id')), fields.InputDataFields.key: (tfexample_decoder.Tensor('image/key/sha256')), fields.InputDataFields.filename: (tfexample_decoder.Tensor('image/filename')), # Object boxes and classes. fields.InputDataFields.groundtruth_boxes: BoundingBoxSequence(prefix='bbox/'), fields.InputDataFields.groundtruth_classes: (tfexample_decoder.Tensor('bbox/label/index')), fields.InputDataFields.groundtruth_area: tfexample_decoder.Tensor('area'), fields.InputDataFields.groundtruth_is_crowd: (tfexample_decoder.Tensor('is_crowd')), fields.InputDataFields.groundtruth_difficult: (tfexample_decoder.Tensor('difficult')), fields.InputDataFields.groundtruth_group_of: (tfexample_decoder.Tensor('group_of')) }
def __init__(self, in_file, tokenizer, subject_mention_probability, max_qry_length, is_training, entity2id, tfrecord_filename): """Initialize dataset.""" del subject_mention_probability num_entities = len(entity2id) del entity2id entity2id = {i: i for i in range(num_entities)} self.gt_file = in_file self.max_qry_length = max_qry_length self.is_training = is_training self.has_bridge = False self.num_bridge = 0 # Read examples from JSON file. self.examples = self.read_examples(in_file) self.num_examples = len(self.examples) if is_training: # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(self.examples) # Write to TFRecords file. writer = FeatureWriter( filename=tfrecord_filename, is_training=self.is_training, has_bridge=self.has_bridge) convert_examples_to_features( examples=self.examples, tokenizer=tokenizer, max_query_length=self.max_qry_length, entity2id=entity2id, output_fn=writer.process_feature) writer.close() # Create input_fn. names_to_features = { "qas_ids": tf.FixedLenFeature([], tf.string), "qry_input_ids": tf.FixedLenFeature([self.max_qry_length], tf.int64), "qry_input_mask": tf.FixedLenFeature([self.max_qry_length], tf.int64), "qry_entity_id": tf.FixedLenFeature([], tf.int64), } if is_training: names_to_features["answer_entities"] = tf.VarLenFeature(tf.int64) if is_training and self.has_bridge: for ii in range(self.num_bridge): names_to_features["bridge_entities_%d" % ii] = tf.VarLenFeature( tf.int64) self.input_fn = input_fn_builder( input_file=tfrecord_filename, is_training=self.is_training, drop_remainder=True, names_to_features=names_to_features)
def _parse_example(serialized_example): """Return inputs and targets Tensors from a serialized tf.Example.""" data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } parsed = tf.parse_single_example(serialized_example, data_fields) inputs = tf.sparse_tensor_to_dense(parsed["inputs"]) targets = tf.sparse_tensor_to_dense(parsed["targets"]) return inputs, targets
def example_reading_spec(self): data_fields = { "all_tags": tf.VarLenFeature(tf.int64), "inputs": tf.VarLenFeature(tf.int64), "input_tags": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "target_tags": tf.VarLenFeature(tf.int64), } data_items_to_decoders = None return data_fields, data_items_to_decoders
def example_reading_spec(self): data_fields = {'targets': tf.VarLenFeature(tf.int64)} for name, _ in self.score_encoders(): data_fields[name] = tf.VarLenFeature(tf.int64) # We don't actually "decode" anything here; the encodings are simply read as # tensors. data_items_to_decoders = None return data_fields, data_items_to_decoders
def __init__(self, include_mask=False, regenerate_source_id=False): self._keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string), 'image/height': tf.FixedLenFeature((), tf.int64, -1), 'image/width': tf.FixedLenFeature((), tf.int64, -1), 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64) }
def decode_dataset(self, serialized_example): example = tf.parse_single_example( serialized_example, features={ "s1": tf.VarLenFeature(tf.float32), "s2": tf.VarLenFeature(tf.float32) }, ) s1 = tf.sparse_tensor_to_dense(example["s1"]) s2 = tf.sparse_tensor_to_dense(example["s2"]) audios = tf.stack([s1, s2]) return audios
def example_reading_spec(self): data_fields = {"dist_targets": tf.VarLenFeature(tf.int64)} if self.has_inputs: data_fields["inputs"] = tf.VarLenFeature(tf.int64) # hack: ignoring true targets and putting dist_targets in targets data_items_to_decoders = { "inputs": contrib.slim().tfexample_decoder.Tensor("inputs"), "targets": contrib.slim().tfexample_decoder.Tensor("dist_targets"), } return (data_fields, data_items_to_decoders)
def dataset_parser(self, serialized_proto): """Parse an Imagenet record from value.""" keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } features = tf.parse_single_example(serialized_proto, keys_to_features) bbox = None if FLAGS.use_annotated_bbox: xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) # Note that we impose an ordering of (y, x) just to make life difficult. bbox = tf.concat([ymin, xmin, ymax, xmax], 0) # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) bbox = tf.transpose(bbox, [0, 2, 1]) image = features['image/encoded'] image = preprocess_raw_bytes(image, is_training=self.is_training, bbox=bbox) label = tf.cast(tf.reshape(features['image/class/label'], shape=[]), dtype=tf.int32) if self.use_bfloat16: image = tf.cast(image, tf.bfloat16) return image, label
def dataset_parser(self, value): """Parse an ImageNet record from a serialized string Tensor.""" keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, ''), 'image/format': tf.FixedLenFeature((), tf.string, 'jpeg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, -1), 'image/class/text': tf.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(value, keys_to_features) image_bytes = tf.reshape(parsed['image/encoded'], shape=[]) image = self.image_preprocessing_fn( image_bytes=image_bytes, is_training=self.is_training, use_bfloat16=self.use_bfloat16, image_size=self.image_size, resize_method=self.resize_method) # Subtract one so that labels are in [0, 1000), and cast to float32 for # Keras model. if self.one_hot: # TODO(ywenxu): The number of classes is hard coded for now. label = tf.cast(parsed['image/class/label'], tf.int32) - 1 label = tf.one_hot(label, 1000, dtype=tf.float32) else: label = tf.cast(tf.reshape( parsed['image/class/label'], shape=[1]), dtype=tf.int32) - 1 label = tf.cast(label, tf.float32) if self.normalize_input: mean = np.reshape(IMAGENET_MEAN, [1, 1, 3]) stddev = np.reshape(IMAGENET_STDDEV, [1, 1, 3]) image = (tf.cast(image, tf.float32) - mean) / stddev if self.use_bfloat16: image = tf.cast(image, tf.bfloat16) return image, label
def __init__( self, include_mask=False, # copypara:strip_begin include_polygon=False, # copypara:strip_end regenerate_source_id=False): self._include_mask = include_mask # copypara:strip_begin self._include_polygon = include_polygon # copypara:strip_end self._regenerate_source_id = regenerate_source_id self._keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string), 'image/source_id': tf.FixedLenFeature((), tf.string, ''), 'image/height': tf.FixedLenFeature((), tf.int64, -1), 'image/width': tf.FixedLenFeature((), tf.int64, -1), 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), } if include_mask: self._keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
def example_reading_spec(self): """Define how data is serialized to file and read back. Returns: data_fields: A dictionary mapping data names to its feature type. data_items_to_decoders: A dictionary mapping data names to TF Example decoders, to be used when reading back TF examples from disk. """ data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } data_items_to_decoders = None return (data_fields, data_items_to_decoders)
def __init__(self, include_mask=False, regenerate_source_id=False, num_attributes=None): self._include_mask = include_mask self._regenerate_source_id = regenerate_source_id self._num_attributes = num_attributes self._keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string), 'image/source_id': tf.FixedLenFeature((), tf.string, ''), 'image/height': tf.FixedLenFeature((), tf.int64, -1), 'image/width': tf.FixedLenFeature((), tf.int64, -1), 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), } if include_mask: self._keys_to_features.update({ 'image/object/mask': tf.VarLenFeature(tf.string), }) if num_attributes: self._keys_to_features.update({ 'image/object/attributes/labels': tf.FixedLenFeature((), tf.string, ''), })
def _parse_example(ex_ser): """Parse serialized Example containing Wikipedia article content.""" features = { "url": tf.VarLenFeature(tf.string), "title": tf.VarLenFeature(tf.string), "section_titles": tf.VarLenFeature(tf.string), "section_texts": tf.VarLenFeature(tf.string), } ex = tf.parse_single_example(ex_ser, features) for k in ex.keys(): ex[k] = ex[k].values ex["url"] = ex["url"][0] ex["title"] = ex["title"][0] return ex
def build_graph(parameters): """Build the graph for parse_example tests.""" feature_dtype = parameters["feature_dtype"] feature_shape = parameters["feature_shape"] is_dense = parameters["is_dense"] input_value = tf.compat.v1.placeholder(dtype=tf.string, name="input", shape=[1]) if is_dense: feature_default_value = np.zeros(shape=feature_shape) if feature_dtype == tf.string: feature_default_value = np.array(["missing"] * feature_shape[0]) features = { "x": tf.FixedLenFeature(shape=feature_shape, dtype=feature_dtype, default_value=feature_default_value) } else: # Sparse features = {"x": tf.VarLenFeature(dtype=feature_dtype)} out = tf.parse_example(input_value, features) output_tensor = out["x"] if not is_dense: output_tensor = out["x"].values return [input_value], [output_tensor]
def _parse_function(example_proto): features = { # "hash": tf.VarLenFeature(tf.string), "text": tf.VarLenFeature(tf.int64) } parsed_features = tf.parse_single_example(example_proto, features) return parsed_features["text"], parsed_features["text"].dense_shape[0]