def testDecodeJpegImage(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) decoded_jpeg = self._DecodeImage(encoded_jpeg) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/source_id': dataset_util.bytes_feature(six.b('image_id')), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. get_shape().as_list()), [None, None, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields. original_image_spatial_shape]. get_shape().as_list()), [2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields. original_image_spatial_shape]) self.assertEqual( six.b('image_id'), tensor_dict[fields.InputDataFields.source_id])
def testDecodeAdditionalChannels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) additional_channel_tensor = np.random.randint( 256, size=(4, 5, 1)).astype(np.uint8) encoded_additional_channel = self._EncodeImage(additional_channel_tensor) decoded_additional_channel = self._DecodeImage(encoded_additional_channel) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/additional_channels/encoded': dataset_util.bytes_list_feature( [encoded_additional_channel] * 2), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/source_id': dataset_util.bytes_feature(six.b('image_id')), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( num_additional_channels=2) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( np.concatenate([decoded_additional_channel] * 2, axis=2), tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeObjectWeight(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) object_weights = [0.75, 1.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/weight': dataset_util.float_list_feature(object_weights), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights] .get_shape().as_list()), [None]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(object_weights, tensor_dict[fields.InputDataFields.groundtruth_weights])
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg( tf.constant(image_tensor)).eval() example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode( 'utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature( [0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature( [1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature( [0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature( [1.0]), 'image/object/class/label': dataset_util.int64_list_feature( [2]), 'image/object/mask': dataset_util.float_list_feature( flat_mask), })) writer.write(example.SerializeToString()) writer.close() return path
def testDecodeEmptyMultiClassScores(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_multiclass_scores=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertEqual(0, tensor_dict[fields.InputDataFields.multiclass_scores].size)
def testDecodeEmptyPngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) encoded_masks = [] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks), 'image/height': dataset_util.int64_feature(10), 'image/width': dataset_util.int64_feature(10), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, [0, 10, 10])
def testDecodeDefaultGroundtruthWeights(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], np.ones(2, dtype=np.float32))
def testDecodePngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png') decoded_png_1 = np.squeeze(mask_1.astype(np.float32)) encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png') decoded_png_2 = np.squeeze(mask_2.astype(np.float32)) encoded_masks = [encoded_png_1, encoded_png_2] decoded_masks = np.stack([decoded_png_1, decoded_png_2]) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( decoded_masks, tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeObjectGroupOf(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) object_group_of = [0, 1] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/group_of': dataset_util.int64_list_feature(object_group_of), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape() .as_list()), [2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( [bool(item) for item in object_group_of], tensor_dict[fields.InputDataFields.groundtruth_group_of])
def testDecodeInstanceSegmentation(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_instance_masks] .get_shape().as_list()), [4, 5, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] .get_shape().as_list()), [4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( instance_masks.astype(np.float32), tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) self.assertAllEqual(object_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeImageLabels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/class/label': dataset_util.int64_list_feature([1, 2]), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 2])) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/class/text': dataset_util.bytes_list_feature( [six.b('dog'), six.b('cat')]), })).SerializeToString() label_map_string = """ item { id:3 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 3]))
def testDecodeKeypoint(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), 'image/object/keypoint/y': dataset_util.float_list_feature(keypoint_ys), 'image/object/keypoint/x': dataset_util.float_list_feature(keypoint_xs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape() .as_list()), [2, 3, 2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs, bbox_xmaxs]).transpose() self.assertAllEqual(expected_boxes, tensor_dict[fields.InputDataFields.groundtruth_boxes]) expected_keypoints = ( np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) self.assertAllEqual( expected_keypoints, tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def create_tf_record(self, has_additional_channels=False, num_examples=1): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) additional_channels_tensor = np.random.randint( 255, size=(4, 5, 1)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg( tf.constant(image_tensor)).eval() encoded_additional_channels_jpeg = tf.image.encode_jpeg( tf.constant(additional_channels_tensor)).eval() for i in range(num_examples): features = { 'image/source_id': dataset_util.bytes_feature(str(i)), 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), } if has_additional_channels: additional_channels_key = 'image/additional_channels/encoded' features[ additional_channels_key] = dataset_util.bytes_list_feature( [encoded_additional_channels_jpeg] * 2) example = tf.train.Example(features=tf.train.Features( feature=features)) writer.write(example.SerializeToString()) writer.close() return path
def testDecodeObjectLabelWithText(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = [six.b('cat'), six.b('dog')] # Annotation label gets overridden by labelmap id. annotated_bbox_classes = [3, 4] expected_bbox_classes = [1, 2] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(annotated_bbox_classes), })).SerializeToString() label_map_string = """ item { id:1 name:'cat' } item { id:2 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) init = tf.tables_initializer() with self.test_session() as sess: sess.run(init) tensor_dict = sess.run(tensor_dict) self.assertAllEqual(expected_bbox_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = [six.b('cat'), six.b('cheetah')] bbox_classes_id = [5, 6] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(bbox_classes_id), })).SerializeToString() label_map_string = """ item { name:'/m/cat' id:3 display_name:'cat' } item { name:'/m/dog' id:1 display_name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertAllEqual([3, -1], tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeImageKeyAndFilename(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/key/sha256': dataset_util.bytes_feature(six.b('abc')), 'image/filename': dataset_util.bytes_feature(six.b('filename')) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertEqual(six.b('abc'), tensor_dict[fields.InputDataFields.key]) self.assertEqual( six.b('filename'), tensor_dict[fields.InputDataFields.filename])
def testInstancesNotAvailableByDefault(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature(six.b('jpeg')), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertTrue( fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def create_mock_tfrecord(): pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB') image_output_stream = StringIO.StringIO() pil_image.save(image_output_stream, format='png') encoded_image = image_output_stream.getvalue() feature_map = { 'test_field': dataset_util.float_list_feature([1, 2, 3, 4]), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } tf_example = tf.train.Example(features=tf.train.Features( feature=feature_map)) with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer: writer.write(tf_example.SerializeToString())
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0] * len(xmin_norm) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature(annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature(annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature(annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature(annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature(annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature(annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature(annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature(annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(annotations['3d_bbox_rot_y']), })) return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] filtered_data_frame_boxes = filtered_data_frame[~filtered_data_frame.YMin. isnull()] filtered_data_frame_labels = filtered_data_frame[ filtered_data_frame.YMin.isnull()] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature( filtered_data_frame_boxes.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature( filtered_data_frame_boxes.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature( filtered_data_frame_boxes.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature( filtered_data_frame_boxes.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame_boxes.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame_boxes.LabelName.map( lambda x: label_map[x]).as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsGroupOf.as_matrix().astype( int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsTruncated.as_matrix().astype(int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsDepiction.as_matrix().astype(int)) if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: feature_map[standard_fields.TfExampleFields. image_class_label] = dataset_util.int64_list_feature( filtered_data_frame_labels.LabelName.map( lambda x: label_map[x]).as_matrix()) feature_map[standard_fields.TfExampleFields. image_class_text] = dataset_util.bytes_list_feature( filtered_data_frame_labels.LabelName.as_matrix()), return tf.train.Example(features=tf.train.Features(feature=feature_map))
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example