def parse(self, serialized_example, is_trainning): """ Parse one example :param serialized_example: :param is_trainning: :return: tensor_dict """ decoder = slim_example_decoder.TFExampleDecoder( self.keys_to_features, self.items_to_handlers) keys = decoder.list_items() tensors = decoder.decode(serialized_example, items=keys) tensor_dict = dict(zip(keys, tensors)) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[ fields.InputDataFields.original_image_spatial_shape] = tf.shape( tensor_dict[fields.InputDataFields.image])[:2] tensor_dict[fields.InputDataFields.image] = tf.image.resize_images( tensor_dict[fields.InputDataFields.image], tf.stack([300, 300]), method=0) if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[ fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[ fields.InputDataFields.image_additional_channels] = channels if fields.InputDataFields.groundtruth_boxes in tensor_dict: is_crowd = fields.InputDataFields.groundtruth_is_crowd tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) def default_groundtruth_weights(): shape = tf.shape( tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] return tf.ones([shpae], dtype=tf.float32) shape = tf.shape( tensor_dict[fields.InputDataFields.groundtruth_weights])[0] tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( tf.greater(shape, 0), lambda: tensor_dict[ fields.InputDataFields.groundtruth_weights], default_groundtruth_weights) return tensor_dict
def input_pipeline(file_pattern, mode, capacity=64): keys_to_features = { "source": tf.VarLenFeature(tf.int64), "target": tf.VarLenFeature(tf.int64), "source_length": tf.FixedLenFeature([1], tf.int64), "target_length": tf.FixedLenFeature([1], tf.int64) } items_to_handlers = { "source": tfexample_decoder.Tensor("source"), "target": tfexample_decoder.Tensor("target"), "source_length": tfexample_decoder.Tensor("source_length"), "target_length": tfexample_decoder.Tensor("target_length") } # Now the non-trivial case construction. with tf.name_scope("examples_queue"): training = (mode == "train") # Read serialized examples using slim parallel_reader. num_epochs = None if training else 1 data_files = parallel_reader.get_data_files(file_pattern) num_readers = min(4 if training else 1, len(data_files)) _, examples = parallel_reader.parallel_read([file_pattern], tf.TFRecordReader, num_epochs=num_epochs, shuffle=training, capacity=2 * capacity, min_after_dequeue=capacity, num_readers=num_readers) decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) decoded = decoder.decode(examples, items=list(items_to_handlers)) examples = {} for (field, tensor) in zip(keys_to_features, decoded): examples[field] = tensor # We do not want int64s as they do are not supported on GPUs. return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)}
def get_split(split_name, dataset_dir): """Get the dataset object for DAVIS 2016. Note that the existence of data files is NOT checked here. Args: split_name: 'train', 'trainval' or 'val'. dataset_dir: The directory of the dataset sources. Returns: A dataset object. Raises: ValueError: if split_name is not recognized. """ file_pattern = os.path.join(dataset_dir, '%s*' % split_name) if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s not found.' % split_name) # Parse tfexamples. # "flow/slice_index" specifies the flattened index in the # 4-D bilateral tensor for each pixel, according to its (dx, dy, x, y) keys_to_features = { 'flow/height': tf.FixedLenFeature((), tf.int64, default_value=0), 'flow/width': tf.FixedLenFeature((), tf.int64, default_value=0), 'sequence/timestep': tf.FixedLenFeature((), tf.int64, default_value=0), 'sequence/name': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/object/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/object/format': tf.FixedLenFeature((), tf.string), "flow_lattice/height": tf.FixedLenFeature((), tf.int64, default_value=0), "flow_lattice/width": tf.FixedLenFeature((), tf.int64, default_value=0), "flow_lattice/values": tf.VarLenFeature(tf.float32), "flow/slice_index": # See comments above. tf.VarLenFeature(tf.int64), "prediction/objectness": tf.VarLenFeature(tf.float32), } # Handle each feature. items_to_handlers = { 'height': tfexample_decoder.Tensor('flow/height'), 'width': tfexample_decoder.Tensor('flow/width'), 'flow_lattice': tfexample_decoder.Tensor('flow_lattice/values', default_value=0.), 'lattice_height': tfexample_decoder.Tensor('flow_lattice/height'), 'lattice_width': tfexample_decoder.Tensor('flow_lattice/width'), 'sequence_name': tfexample_decoder.Tensor('sequence/name'), 'timestep': tfexample_decoder.Tensor('sequence/timestep'), 'object_labels': tfexample_decoder.Image('image/segmentation/object/encoded', 'image/segmentation/object/format', channels=1), 'slice_index': tfexample_decoder.Tensor('flow/slice_index'), 'objectness': tfexample_decoder.Tensor('prediction/objectness'), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES)
def decode(self, tf_example_string_tensor): """Decodes serialized tensorflow example and returns a tensor dictionary. Args: tf_example_string_tensor: a string tensor holding a serialized tensorflow example proto. Returns: A dictionary of the following tensors. fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] containing image. fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of shape [2] containing shape of the image. fields.InputDataFields.source_id - string tensor containing original image id. fields.InputDataFields.key - string tensor with unique sha256 hash key. fields.InputDataFields.filename - string tensor with original dataset filename. fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape [None, 4] containing box corners. fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape [None] containing classes for the boxes. fields.InputDataFields.groundtruth_weights - 1D float32 tensor of shape [None] indicating the weights of groundtruth boxes. fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape [None] containing containing object mask area in pixel squared. fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape [None] indicating if the boxes enclose a crowd. Optional: fields.InputDataFields.image_additional_channels - 3D uint8 tensor of shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim is width; 3rd dim is the number of additional channels. fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape [None] indicating if the boxes represent `difficult` instances. fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape [None] indicating if the boxes represent `group_of` instances. fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of shape [None, num_keypoints, 2] containing keypoints, where the coordinates of the keypoints are ordered (y, x). fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool tensor of shape [None, num_keypoints] containing keypoint visibilites. fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of shape [None, None, None] containing instance masks. fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape [None] containing classes for the boxes. fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape [None * num_classes] containing flattened multiclass scores for groundtruth boxes. fields.InputDataFields.context_features - 1D float32 tensor of shape [context_feature_length * num_context_features] fields.InputDataFields.context_feature_length - int32 tensor specifying the length of each feature in context_features """ serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) decoder = slim_example_decoder.TFExampleDecoder( self.keys_to_features, self.items_to_handlers) keys = decoder.list_items() tensors = decoder.decode(serialized_example, items=keys) tensor_dict = dict(zip(keys, tensors)) is_crowd = fields.InputDataFields.groundtruth_is_crowd tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[ fields.InputDataFields.original_image_spatial_shape] = tf.shape( tensor_dict[fields.InputDataFields.image])[:2] if fields.InputDataFields.image_additional_channels in tensor_dict: channels = tensor_dict[ fields.InputDataFields.image_additional_channels] channels = tf.squeeze(channels, axis=3) channels = tf.transpose(channels, perm=[1, 2, 0]) tensor_dict[ fields.InputDataFields.image_additional_channels] = channels def default_groundtruth_weights(): return tf.ones([ tf.shape( tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] ], dtype=tf.float32) tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( tf.greater( tf.shape(tensor_dict[ fields.InputDataFields.groundtruth_weights])[0], 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], default_groundtruth_weights) if fields.InputDataFields.groundtruth_keypoints in tensor_dict: # Set all keypoints that are not labeled and not visible to NaN. gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities visibilities_tiled = tf.tile( tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2]) tensor_dict[gt_kpt_fld] = tf.where( visibilities_tiled, tensor_dict[gt_kpt_fld], np.nan * tf.ones_like(tensor_dict[gt_kpt_fld])) return tensor_dict
def get_dataset(dataset_name, split_name, dataset_dir): """Gets an instance of slim Dataset. Args: dataset_name: Dataset name. split_name: A train/val Split name. dataset_dir: The directory of the dataset sources. Returns: An instance of slim Dataset. Raises: ValueError: if the dataset_name or split_name is not recognized. """ if dataset_name not in _DATASETS_INFORMATION: raise ValueError('The specified dataset is not supported yet.') splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes if split_name not in splits_to_sizes: raise ValueError('data split name %s not recognized' % split_name) # Prepare the variables for different datasets. num_classes = _DATASETS_INFORMATION[dataset_name].num_classes ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Specify how the TF-Examples are decoded. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature((), tf.int64, default_value=0), 'image/width': tf.FixedLenFeature((), tf.int64, default_value=0), 'image/segmentation/class/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/segmentation/class/format': tf.FixedLenFeature((), tf.string, default_value='png'), } items_to_handlers = { 'image': tfexample_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3), 'image_name': tfexample_decoder.Tensor('image/filename'), 'height': tfexample_decoder.Tensor('image/height'), 'width': tfexample_decoder.Tensor('image/width'), 'labels_class': tfexample_decoder.Image(image_key='image/segmentation/class/encoded', format_key='image/segmentation/class/format', channels=1), } decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=splits_to_sizes[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, ignore_label=ignore_label, num_classes=num_classes, name=dataset_name, multi_label=True)