def testDecodeExampleWithFixLenTensorWithShape(self): np_array = np.array([[1, 2, 3], [4, 5, 6]]) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'labels': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.FixedLenFeature(np_array.shape, dtype=dtypes.int64), } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels', shape=np_array.shape), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels, np_array)
def testDecodeExampleWithLookup(self): example = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeature(np.array(['cat', 'dog', 'guinea pig'])), })) serialized_example = example.SerializeToString() # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) with self.cached_session() as sess: sess.run(lookup_ops.tables_initializer()) serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), } items_to_handlers = { 'labels': tfexample_decoder.LookupTensor('image/object/class/text', table), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) obtained_class_ids = decoder.decode(serialized_example)[0].eval() self.assertAllClose([2, 0, 1], obtained_class_ids)
def testDecodeExampleWithSparseTensorToDense(self): np_indices = np.array([1, 2, 5]) np_values = np.array([0.1, 0.2, 0.6]).astype('f') np_shape = np.array([6]) np_dense = np.array([0.0, 0.1, 0.2, 0.0, 0.0, 0.6]).astype('f') example = example_pb2.Example( features=feature_pb2.Features( feature={ 'indices': self._EncodedInt64Feature(np_indices), 'values': self._EncodedFloatFeature(np_values), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'values': parsing_ops.VarLenFeature(dtype=dtypes.float32), } items_to_handlers = { 'labels': tfexample_decoder.SparseTensor(shape=np_shape, densify=True), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllClose(labels, np_dense)
def testDecodeExampleWithTensor(self): tensor_shape = (2, 3, 1) np_array = np.random.rand(2, 3, 1) example = example_pb2.Example( features=feature_pb2.Features(feature={ 'image/depth_map': self._EncodedFloatFeature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/depth_map': parsing_ops.FixedLenFeature( tensor_shape, dtypes.float32, default_value=array_ops.zeros(tensor_shape)) } items_to_handlers = {'depth': tfexample_decoder.Tensor('image/depth_map')} decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_depth] = decoder.decode(serialized_example, ['depth']) depth = tf_depth.eval() self.assertAllClose(np_array, depth)
def testDecodeExampleWithStringTensor(self): tensor_shape = (2, 3, 1) np_array = np.array([[['ab'], ['cd'], ['ef']], [['ghi'], ['jkl'], ['mnop']]]) example = example_pb2.Example( features=feature_pb2.Features(feature={ 'labels': self._BytesFeature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.FixedLenFeature( tensor_shape, dtypes.string, default_value=constant_op.constant( '', shape=tensor_shape, dtype=dtypes.string)) } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() labels = labels.astype(np_array.dtype) self.assertTrue(np.array_equal(np_array, labels))
def testDecodeExampleWithSparseTensorWithKeyShape(self): np_indices = np.array([[1], [2], [5]]) np_values = np.array([0.1, 0.2, 0.6]).astype('f') np_shape = np.array([6]) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'indices': self._EncodedInt64Feature(np_indices), 'values': self._EncodedFloatFeature(np_values), 'shape': self._EncodedInt64Feature(np_shape), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'values': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'shape': parsing_ops.VarLenFeature(dtype=dtypes.int64), } items_to_handlers = { 'labels': tfexample_decoder.SparseTensor(shape_key='shape'), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels.indices, np_indices) self.assertAllEqual(labels.values, np_values) self.assertAllEqual(labels.dense_shape, np_shape)
def DecodeExample(self, serialized_example, item_handler, image_format): """Decodes the given serialized example with the specified item handler. Args: serialized_example: a serialized TF example string. item_handler: the item handler used to decode the image. image_format: the image format being decoded. Returns: the decoded image found in the serialized Example. """ serialized_example = array_ops.reshape(serialized_example, shape=[]) decoder = tfexample_decoder.TFExampleDecoder( keys_to_features={ 'image/encoded': parsing_ops.FixedLenFeature((), dtypes.string, default_value=''), 'image/format': parsing_ops.FixedLenFeature((), dtypes.string, default_value=image_format), }, items_to_handlers={'image': item_handler}) [tf_image] = decoder.decode(serialized_example, ['image']) return tf_image
def get_split(split_name='train', dataset_dir=None, num_classes_per_attribute=None): """Gets a dataset tuple with instructions for reading 2D shapes data. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. num_classes_per_attribute: The number of labels for the classfication problem corresponding to each attribute. For example, if the first attribute is "shape" and there are three possible shapes, then then provide a value 3 in the first index, and so on. Returns: A `Dataset` namedtuple. metadata: A dictionary with some metadata about the dataset we just constructed. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if num_classes_per_attribute is None: num_classes_per_attribute = _NUM_CLASSES_PER_ATTRIBUTE if dataset_dir is None: dataset_dir = _DATASET_DIR file_pattern = os.path.join(dataset_dir, _FILE_PATTERN % (_SPLIT_TYPE, split_name)) keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 'labels': tf.FixedLenFeature([len(num_classes_per_attribute)], tf.int64), 'latents': tf.FixedLenFeature([_NUM_LATENTS], tf.float32), } items_to_handlers = { 'image': tfexample_decoder.Image(shape=[64, 64, 3]), 'labels': tfexample_decoder.Tensor('labels'), 'latents': tfexample_decoder.Tensor('latents'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) metadata = { 'num_classes_per_attribute': num_classes_per_attribute, 'split_type': _SPLIT_TYPE } return dataset.Dataset( data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS), metadata
def make_data_provider(self, **kwargs): splitter_source = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="source_tokens", length_feature_name="source_len", append_token="SEQUENCE_END", delimiter=self.params["source_delimiter"]) splitter_target = split_tokens_decoder.SplitTokensDecoder( tokens_feature_name="target_tokens", length_feature_name="target_len", prepend_token="SEQUENCE_START", append_token="SEQUENCE_END", delimiter=self.params["target_delimiter"]) keys_to_features = { self.params["source_field"]: tf.FixedLenFeature((), tf.string), self.params["target_field"]: tf.FixedLenFeature((), tf.string, default_value="") } items_to_handlers = {} items_to_handlers[ "source_tokens"] = tfexample_decoder.ItemHandlerCallback( keys=[self.params["source_field"]], func=lambda dict: splitter_source.decode( dict[self.params["source_field"]], ["source_tokens"])[0]) items_to_handlers[ "source_len"] = tfexample_decoder.ItemHandlerCallback( keys=[self.params["source_field"]], func=lambda dict: splitter_source.decode( dict[self.params["source_field"]], ["source_len"])[0]) items_to_handlers[ "target_tokens"] = tfexample_decoder.ItemHandlerCallback( keys=[self.params["target_field"]], func=lambda dict: splitter_target.decode( dict[self.params["target_field"]], ["target_tokens"])[0]) items_to_handlers[ "target_len"] = tfexample_decoder.ItemHandlerCallback( keys=[self.params["target_field"]], func=lambda dict: splitter_target.decode( dict[self.params["target_field"]], ["target_len"])[0]) decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) dataset = tf.contrib.slim.dataset.Dataset( data_sources=self.params["files"], reader=tf.TFRecordReader, decoder=decoder, num_samples=None, items_to_descriptions={}) return tf.contrib.slim.dataset_data_provider.DatasetDataProvider( dataset=dataset, shuffle=self.params["shuffle"], num_epochs=self.params["num_epochs"], **kwargs)
def testDecodeImageWithItemHandlerCallback(self): image_shape = (2, 3, 3) for image_encoding in ['jpeg', 'png']: image, serialized_example = self.GenerateImage( image_format=image_encoding, image_shape=image_shape) with self.test_session(): def ConditionalDecoding(keys_to_tensors): """See base class.""" image_buffer = keys_to_tensors['image/encoded'] image_format = keys_to_tensors['image/format'] def DecodePng(): return image_ops.decode_png(image_buffer, 3) def DecodeJpg(): return image_ops.decode_jpeg(image_buffer, 3) image = control_flow_ops.case( { math_ops.equal(image_format, 'png'): DecodePng, }, default=DecodeJpg, exclusive=True) image = array_ops.reshape(image, image_shape) return image keys_to_features = { 'image/encoded': parsing_ops.FixedLenFeature((), dtypes.string, default_value=''), 'image/format': parsing_ops.FixedLenFeature((), dtypes.string, default_value='jpeg') } items_to_handlers = { 'image': tfexample_decoder.ItemHandlerCallback( ['image/encoded', 'image/format'], ConditionalDecoding) } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_image] = decoder.decode(serialized_example, ['image']) decoded_image = tf_image.eval() if image_encoding == 'jpeg': # For jenkins: image = image.astype(np.float32) decoded_image = decoded_image.astype(np.float32) self.assertAllClose(image, decoded_image, rtol=.5, atol=1.001) else: self.assertAllClose(image, decoded_image, atol=0)
def get_split(split_name, dataset_dir=None): """Gets a dataset tuple with instructions for reading cifar100. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. Returns: A `Dataset` namedtuple. Image tensors are integers in [0, 255]. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) file_pattern = os.path.join(dataset_dir, _FILE_PATTERN % split_name) keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value=''), 'image/class/label': tf.FixedLenFeature([1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), 'image/class/fine_label': tf.FixedLenFeature([1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), } if split_name == 'train': items_to_handlers = { 'image': tfexample_decoder.Image(shape=[32, 32, 3]), 'label': tfexample_decoder.Tensor('image/class/label'), } else: items_to_handlers = { 'image': tfexample_decoder.Image(shape=[32, 32, 3]), 'label': tfexample_decoder.Tensor('image/class/fine_label'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], num_classes=_NUM_CLASSES, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS)
def testDecodeExampleWithBoundingBoxDense(self): num_bboxes = 10 np_ymin = np.random.rand(num_bboxes, 1) np_xmin = np.random.rand(num_bboxes, 1) np_ymax = np.random.rand(num_bboxes, 1) np_xmax = np.random.rand(num_bboxes, 1) np_bboxes = np.hstack([np_ymin, np_xmin, np_ymax, np_xmax]) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/bbox/ymin': self._EncodedFloatFeature(np_ymin), 'image/object/bbox/xmin': self._EncodedFloatFeature(np_xmin), 'image/object/bbox/ymax': self._EncodedFloatFeature(np_ymax), 'image/object/bbox/xmax': self._EncodedFloatFeature(np_xmax), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/object/bbox/ymin': parsing_ops.FixedLenSequenceFeature([], dtypes.float32, allow_missing=True), 'image/object/bbox/xmin': parsing_ops.FixedLenSequenceFeature([], dtypes.float32, allow_missing=True), 'image/object/bbox/ymax': parsing_ops.FixedLenSequenceFeature([], dtypes.float32, allow_missing=True), 'image/object/bbox/xmax': parsing_ops.FixedLenSequenceFeature([], dtypes.float32, allow_missing=True), } items_to_handlers = { 'object/bbox': tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_bboxes] = decoder.decode(serialized_example, ['object/bbox']) bboxes = tf_bboxes.eval() self.assertAllClose(np_bboxes, bboxes)
def testDecodeExampleWithBackupHandlerLookup(self): example1 = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeature(np.array(['cat', 'dog', 'guinea pig'])), 'image/object/class/label': self._EncodedInt64Feature(np.array([42, 10, 900])) })) example2 = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeature(np.array(['cat', 'dog', 'guinea pig'])), })) example3 = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/class/label': self._EncodedInt64Feature(np.array([42, 10, 901])) })) # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), 'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64), } backup_handler = tfexample_decoder.BackupHandler( handler=tfexample_decoder.Tensor('image/object/class/label'), backup=tfexample_decoder.LookupTensor('image/object/class/text', table)) items_to_handlers = { 'labels': backup_handler, } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) obtained_class_ids_each_example = [] with self.test_session() as sess: sess.run(lookup_ops.tables_initializer()) for example in [example1, example2, example3]: serialized_example = array_ops.reshape( example.SerializeToString(), shape=[]) obtained_class_ids_each_example.append( decoder.decode(serialized_example)[0].eval()) self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0]) self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1]) self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testDecodeExampleMultiShapeKeyTensor(self): np_image = np.random.rand(2, 3, 1).astype('f') np_labels = np.array([[[1], [2], [3]], [[4], [5], [6]]]) height, width, depth = np_labels.shape example = example_pb2.Example( features=feature_pb2.Features( feature={ 'image': self._EncodedFloatFeature(np_image), 'image/shape': self._EncodedInt64Feature(np.array(np_image.shape)), 'labels': self._EncodedInt64Feature(np_labels), 'labels/height': self._EncodedInt64Feature(np.array([height])), 'labels/width': self._EncodedInt64Feature(np.array([width])), 'labels/depth': self._EncodedInt64Feature(np.array([depth])), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'image/shape': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels/height': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels/width': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels/depth': parsing_ops.VarLenFeature(dtype=dtypes.int64), } items_to_handlers = { 'image': tfexample_decoder.Tensor('image', shape_keys='image/shape'), 'labels': tfexample_decoder.Tensor( 'labels', shape_keys=['labels/height', 'labels/width', 'labels/depth']), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_image, tf_labels] = decoder.decode(serialized_example, ['image', 'labels']) self.assertAllEqual(tf_image.eval(), np_image) self.assertAllEqual(tf_labels.eval(), np_labels)
def testDecodeExampleWithRepeatedImages(self): image_shape = (2, 3, 3) image_format = 'png' image, _ = self.GenerateImage(image_format=image_format, image_shape=image_shape) tf_encoded = self._Encoder(image, image_format) with self.test_session(): tf_string = tf_encoded.eval() example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/encoded': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[tf_string, tf_string])), 'image/format': self._StringFeature(image_format), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) decoder = tfexample_decoder.TFExampleDecoder( keys_to_features={ 'image/encoded': parsing_ops.FixedLenFeature((2, ), dtypes.string), 'image/format': parsing_ops.FixedLenFeature((), dtypes.string, default_value=image_format), }, items_to_handlers={ 'image': tfexample_decoder.Image(repeated=True) }) [tf_image] = decoder.decode(serialized_example, ['image']) output_image = tf_image.eval() self.assertEqual(output_image.shape, (2, 2, 3, 3)) self.assertAllEqual(np.squeeze(output_image[0, :, :, :]), image) self.assertAllEqual(np.squeeze(output_image[1, :, :, :]), image)
def testDecodeExampleWithItemHandlerCallback(self): np.random.seed(0) tensor_shape = (2, 3, 1) np_array = np.random.rand(2, 3, 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/depth_map': self._EncodedFloatFeature(np_array), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/depth_map': parsing_ops.FixedLenFeature( tensor_shape, dtypes.float32, default_value=array_ops.zeros(tensor_shape)) } def HandleDepth(keys_to_tensors): depth = list(keys_to_tensors.values())[0] depth += 1 return depth items_to_handlers = { 'depth': tfexample_decoder.ItemHandlerCallback('image/depth_map', HandleDepth) } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_depth] = decoder.decode(serialized_example, ['depth']) depth = tf_depth.eval() self.assertAllClose(np_array, depth - 1)
def testDecodeExampleWithInt64Tensor(self): np_array = np.random.randint(1, 10, size=(2, 3, 1)) example = example_pb2.Example( features=feature_pb2.Features(feature={ 'array': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'array': parsing_ops.FixedLenFeature(np_array.shape, dtypes.int64) } items_to_handlers = { 'array': tfexample_decoder.Tensor('array'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_array] = decoder.decode(serialized_example, ['array']) self.assertAllEqual(tf_array.eval(), np_array)
def _get_split(file_pattern, num_samples, num_views, image_size, vox_size): """Get dataset.Dataset for the given dataset file pattern and properties.""" # A dictionary from TF-Example keys to tf.FixedLenFeature instance. keys_to_features = { 'image': tf.FixedLenFeature(shape=[num_views, image_size, image_size, 3], dtype=tf.float32, default_value=None), 'mask': tf.FixedLenFeature(shape=[num_views, image_size, image_size, 1], dtype=tf.float32, default_value=None), 'vox': tf.FixedLenFeature(shape=[vox_size, vox_size, vox_size, 1], dtype=tf.float32, default_value=None), } items_to_handler = { 'image': tfexample_decoder.Tensor('image', shape=[num_views, image_size, image_size, 3]), 'mask': tfexample_decoder.Tensor('mask', shape=[num_views, image_size, image_size, 1]), 'vox': tfexample_decoder.Tensor('vox', shape=[vox_size, vox_size, vox_size, 1]) } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handler) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=num_samples, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS)
def testDecodeExampleWithVarLenTensor(self): np_array = np.array([[[1], [2], [3]], [[4], [5], [6]]]) example = example_pb2.Example( features=feature_pb2.Features(feature={ 'labels': self._EncodedInt64Feature(np_array), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'labels': parsing_ops.VarLenFeature(dtype=dtypes.int64), } items_to_handlers = { 'labels': tfexample_decoder.Tensor('labels'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) [tf_labels] = decoder.decode(serialized_example, ['labels']) labels = tf_labels.eval() self.assertAllEqual(labels, np_array.flatten())
def get_split(split_name='train', split_type="iid", dataset_dir=None, image_length=64, num_classes_per_attribute=None): """Gets a dataset tuple with instructions for reading 2D shapes data. Args: split_name: A train/test split name. split_type: str, type of split being loaded "iid" or "comp" dataset_dir: The base directory of the dataset sources. num_classes_per_attribute: The number of labels for the classfication problem corresponding to each attribute. For example, if the first attribute is "shape" and there are three possible shapes, then then provide a value 3 in the first index, and so on. Returns: A `Dataset` namedtuple. metadata: A dictionary with some metadata about the dataset we just constructed. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if split_type is not "iid": raise ValueError("Only IID split available for CelebA.") if num_classes_per_attribute is None: num_classes_per_attribute = _NUM_CLASSES_PER_ATTRIBUTE if dataset_dir is None or dataset_dir == '': dataset_dir = _DATASET_DIR # Load attribute label map file. label_map_json = os.path.join(dataset_dir, 'attribute_label_map.json') file_pattern = os.path.join(dataset_dir, _FILE_PATTERN % split_name) tf.logging.info('Loading from %s file.' % (file_pattern)) keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 'image/labels': tf.FixedLenFeature([len(num_classes_per_attribute)], tf.int64), } # TODO(vrama): See # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py#L270 # For where changes would need to be made to preprocess the images which # get loaded. items_to_handlers = { 'image': ImageDecodeProcess(shape=[218, 178, 3], image_length=64), 'labels': tfexample_decoder.Tensor('image/labels'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) metadata = { 'num_classes_per_attribute': num_classes_per_attribute, 'split_type': _SPLIT_TYPE, 'label_map_json': label_map_json, } return dataset.Dataset( data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS), metadata
def __init__(self, data_sources, has_mask, labels, context_window_size, observation_codes, intervention_codes): """Creates a dataset for clinical time series dense feature lab data. Args: data_sources: A list of files/patterns for the slim Dataset. has_mask: Whether the dataset has obs_mask and true_length_hr feature. labels: A list of labels in string, corresponding to labels in ModelConfig. context_window_size: size of the context window, i.e, the length of the time series. observation_codes: A list of features corresponding to the observation time series data. intervention_codes: A list of features corresponding to the intervention time series data. Returns: A slim dataset with proper reader and decoders. """ keys_to_features = {} items_to_handlers = {} keys_to_features['context_window_size'] = tf.FixedLenFeature( [], dtype=tf.int64, default_value=0) items_to_handlers['context_window_size'] = tfexample_decoder.Tensor( 'context_window_size', default_value=0) keys_to_features['context_window_start_time_sec'] = tf.FixedLenFeature( [], dtype=tf.int64, default_value=0) items_to_handlers[ 'context_window_start_time_sec'] = tfexample_decoder.Tensor( 'context_window_start_time_sec', default_value=0) keys_to_features['trigger_time_sec'] = tf.FixedLenFeature( [], dtype=tf.int64, default_value=0) items_to_handlers['trigger_time_sec'] = tfexample_decoder.Tensor( 'trigger_time_sec', default_value=0) if has_mask: keys_to_features['true_length_hr'] = tf.FixedLenFeature( [], dtype=tf.int64, default_value=0) items_to_handlers['true_length_hr'] = tfexample_decoder.Tensor( 'true_length_hr', default_value=0) tf.logging.info('Labels are:') for label in labels: tf.logging.info(label) keys_to_features[label[0]] = tf.FixedLenFeature([], dtype=tf.int64, default_value=-1) items_to_handlers[label[0]] = tfexample_decoder.Tensor(label[0]) # This label is for a survival analysis event. if label[1]: tf.logging.info(label[0] + '.time_of_event') keys_to_features[label[0] + '.time_of_event'] = tf.FixedLenFeature( [], dtype=tf.int64, default_value=0) items_to_handlers[label[0] + '.time_of_event'] = tfexample_decoder.Tensor( label[0] + '.time_of_event') tf.logging.info('Features are:') for observation in observation_codes: tf.logging.info(observation) keys_to_features[observation] = tf.FixedLenFeature( shape=[context_window_size], dtype=tf.float32) items_to_handlers[observation] = tfexample_decoder.Tensor( observation, default_value=-1) if has_mask: observation = strip_raw_feature(observation) keys_to_features[observation + '_mask'] = tf.FixedLenFeature( shape=[context_window_size], dtype=tf.float32) items_to_handlers[observation + '_mask'] = tfexample_decoder.Tensor( observation + '_mask', default_value=0) for intervention in intervention_codes: tf.logging.info(intervention) keys_to_features[intervention] = tf.FixedLenFeature( shape=[context_window_size], dtype=tf.float32) items_to_handlers[intervention] = tfexample_decoder.Tensor( intervention, default_value=-1) if has_mask: intervention = strip_raw_feature(intervention) keys_to_features[intervention + '_mask'] = tf.FixedLenFeature( shape=[context_window_size], dtype=tf.float32) items_to_handlers[intervention + '_mask'] = tfexample_decoder.Tensor( intervention + '_mask', default_value=0) decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) super(ClinicalSeriesDataset, self).__init__(data_sources=data_sources, reader=tf.compat.v1.TFRecordReader, decoder=decoder, num_samples=None, items_to_descriptions={})
def _create_tfrecord_dataset(config): """Create tfrecord dataset for DatasetDataProvider. Args: config: an instance of AdsExample proto. Returns: dataset: a slim.data.dataset.Dataset instance. """ def _handle_frame_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/features', config.feature_dims) def _handle_climax_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/climax_features', 1) def _handle_climax_predictions_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/climax_predictions', 1) def _handle_common_object_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/common_object_features', config.common_object_feature_dims) def _handle_place_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/place_features', config.place_feature_dims) def _handle_emotic_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/emotic_features', config.emotic_feature_dims) def _handle_affectnet_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/affectnet_features', config.affectnet_feature_dims) def _handle_shot_boundary_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/shot_boundary_features', config.shot_boundary_feature_dims) def _handle_optical_flow_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/optical_flow_features', config.optical_flow_feature_dims) def _handle_audio_features_wrapper(keys_to_tensors): return _handle_frame_features(keys_to_tensors, 'video/audio_features', config.audio_feature_dims) item_handler_frame_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/features'], func=_handle_frame_features_wrapper) item_handler_climax_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/climax_features'], func=_handle_climax_features_wrapper) item_handler_climax_predictions = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/climax_predictions'], func=_handle_climax_predictions_wrapper) item_handler_common_object_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/common_object_features'], func=_handle_common_object_features_wrapper) item_handler_place_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/place_features'], func=_handle_place_features_wrapper) item_handler_emotic_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/emotic_features'], func=_handle_emotic_features_wrapper) item_handler_affectnet_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/affectnet_features'], func=_handle_affectnet_features_wrapper) item_handler_shot_boundary_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/shot_boundary_features'], func=_handle_shot_boundary_features_wrapper) item_handler_optical_flow_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/optical_flow_features'], func=_handle_optical_flow_features_wrapper) item_handler_audio_features = tfexample_decoder.ItemHandlerCallback( keys=['video/n_frames', 'video/audio_features'], func=_handle_audio_features_wrapper) keys_to_features = { 'video/source_id': tf.FixedLenFeature(shape=(), dtype=tf.string, default_value=''), 'video/n_frames': tf.FixedLenFeature((), tf.int64, default_value=0), 'video/features': tf.VarLenFeature(tf.float32), 'video/climax_features': tf.VarLenFeature(tf.float32), 'video/climax_predictions': tf.VarLenFeature(tf.float32), 'video/common_object_features': tf.VarLenFeature(tf.float32), 'video/place_features': tf.VarLenFeature(tf.float32), 'video/emotic_features': tf.VarLenFeature(tf.float32), 'video/affectnet_features': tf.VarLenFeature(tf.float32), 'video/shot_boundary_features': tf.VarLenFeature(tf.float32), 'video/optical_flow_features': tf.VarLenFeature(tf.float32), 'video/audio_features': tf.VarLenFeature(tf.float32), 'anno/topic': tf.FixedLenFeature((), tf.int64), 'anno/sentiment': tf.FixedLenFeature((), tf.int64), 'anno/sentiment_list': tf.FixedLenFeature([config.sentiment_num_classes], tf.float32), } items_to_handlers = { 'video_id': tfexample_decoder.Tensor('video/source_id'), 'n_frames': tfexample_decoder.Tensor('video/n_frames'), 'topic': tfexample_decoder.Tensor('anno/topic'), 'sentiment': tfexample_decoder.Tensor('anno/sentiment'), 'frame_features': item_handler_frame_features, 'climax_features': item_handler_climax_features, 'climax_predictions': item_handler_climax_predictions, 'common_object_features': item_handler_common_object_features, 'place_features': item_handler_place_features, 'emotic_features': item_handler_emotic_features, 'affectnet_features': item_handler_affectnet_features, 'shot_boundary_features': item_handler_shot_boundary_features, 'optical_flow_features': item_handler_optical_flow_features, 'audio_features': item_handler_audio_features, 'sentiment_list': tfexample_decoder.Tensor('anno/sentiment_list'), } #if config.use_sent_list: # keys_to_features['anno/sentiment_list'] = tf.FixedLenFeature([config.sentiment_num_classes], tf.float32), # items_to_handlers['sentiment_list'] = tfexample_decoder.Tensor('anno/sentiment_list') decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) input_paths = [ config.input_path[i] for i in xrange(len(config.input_path)) ] return dataset.Dataset(data_sources=input_paths, reader=tf.TFRecordReader, decoder=decoder, num_samples=config.num_examples, items_to_descriptions=None)