def testDecodeObjectGroupOf(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) object_group_of = [0, 1] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/group_of': dataset_util.int64_list_feature(object_group_of), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape() .as_list()), [2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( [bool(item) for item in object_group_of], tensor_dict[fields.InputDataFields.groundtruth_group_of])
def create_tf_example(example, writer): height = example['height'] width = example['width'] filename = example['filename'] encoded_image_data = example['encoded_image_data'] image_format = example['image_format'] bboxes = example['bbox'] xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box classes_text = example['class_text'] classes = example['class_idx'] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testDecodePngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png') decoded_png_1 = np.squeeze(mask_1.astype(np.float32)) encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png') decoded_png_2 = np.squeeze(mask_2.astype(np.float32)) encoded_masks = [encoded_png_1, encoded_png_2] decoded_masks = np.stack([decoded_png_1, decoded_png_2]) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( decoded_masks, tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeJpegImage(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) decoded_jpeg = self._DecodeImage(encoded_jpeg) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/source_id': dataset_util.bytes_feature('image_id'), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. get_shape().as_list()), [None, None, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields. original_image_spatial_shape]. get_shape().as_list()), [2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields. original_image_spatial_shape]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeEmptyPngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) encoded_masks = [] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks), 'image/height': dataset_util.int64_feature(10), 'image/width': dataset_util.int64_feature(10), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, [0, 10, 10])
def testDecodeDefaultGroundtruthWeights(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], np.ones(2, dtype=np.float32))
def testDecodeObjectWeight(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) object_weights = [0.75, 1.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/weight': dataset_util.float_list_feature(object_weights), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights] .get_shape().as_list()), [None]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(object_weights, tensor_dict[fields.InputDataFields.groundtruth_weights])
def create_tf_example(row): full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [class_text_to_int(row['class'])] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def testDecodeAdditionalChannels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) additional_channel_tensor = np.random.randint( 256, size=(4, 5, 1)).astype(np.uint8) encoded_additional_channel = self._EncodeImage(additional_channel_tensor) decoded_additional_channel = self._DecodeImage(encoded_additional_channel) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/additional_channels/encoded': dataset_util.bytes_list_feature( [encoded_additional_channel] * 2), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/source_id': dataset_util.bytes_feature('image_id'), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( num_additional_channels=2) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( np.concatenate([decoded_additional_channel] * 2, axis=2), tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeInstanceSegmentation(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_instance_masks] .get_shape().as_list()), [4, 5, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] .get_shape().as_list()), [4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( instance_masks.astype(np.float32), tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) self.assertAllEqual(object_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeKeypoint(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), 'image/object/keypoint/y': dataset_util.float_list_feature(keypoint_ys), 'image/object/keypoint/x': dataset_util.float_list_feature(keypoint_xs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape() .as_list()), [2, 3, 2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs, bbox_xmaxs]).transpose() self.assertAllEqual(expected_boxes, tensor_dict[fields.InputDataFields.groundtruth_boxes]) self.assertAllEqual( 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) expected_keypoints = ( np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) self.assertAllEqual( expected_keypoints, tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def testDecodeImageLabels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/class/label': dataset_util.int64_list_feature([1, 2]), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 2])) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/class/text': dataset_util.bytes_list_feature(['dog', 'cat']), })).SerializeToString() label_map_string = """ item { id:3 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 3]))
def dict_to_tf_example(data, label_map_dict, data_dir): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. """ img_path = os.path.join(data_dir, data.replace("mask", "images")) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = 512 height = 512 classes = [] classes_text = [] encoded_mask_png_list = [] mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255 output = io.BytesIO() encoded_mask_png_list.append(mask_png.save(output, mask_png)) class_name = 'water' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def createTfExample(singleFileData, path): # use TensorFlow's GFile function to open the .jpg image matching the current box data with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile: tensorFlowImage = tensorFlowImageFile.read() # end with # get the image width and height via converting from a TensorFlow image to an io library BytesIO image, # then to a PIL Image, then breaking out the width and height bytesIoImage = io.BytesIO(tensorFlowImage) pilImage = Image.open(bytesIoImage) width, height = pilImage.size # get the file name from the file data passed in, and set the image format to .jpg fileName = singleFileData.filename.encode('utf8') imageFormat = b'jpg' # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer xMins = [] xMaxs = [] yMins = [] yMaxs = [] classesAsText = [] classesAsInts = [] # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box) for index, row in singleFileData.object.iterrows(): xMins.append(row['xmin'] / width) xMaxs.append(row['xmax'] / width) yMins.append(row['ymin'] / height) yMaxs.append(row['ymax'] / height) classesAsText.append(row['class'].encode('utf8')) classesAsInts.append(classAsTextToClassAsInt(row['class'])) # end for # finally we can calculate and return the TensorFlow Example tfExample = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fileName), 'image/source_id': dataset_util.bytes_feature(fileName), 'image/encoded': dataset_util.bytes_feature(tensorFlowImage), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xMins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(yMins), 'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classesAsText), 'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)})) return tfExample
def create_tf_example(filename, writer): lines = open(filename).readlines() image_filename = lines[0].strip()[1:] classes_text = [] classes = [] xmins = [] xmaxs = [] ymins = [] ymaxs = [] im = Image.open(image_filename) arr = io.BytesIO() im.save(arr, format='PNG') height = im.height width = im.width encoded_image_data = arr.getvalue() image_format = 'png' for line in lines[1:]: line = line.strip() if line == '': continue data = line.split(",") bbox = list(map(int, map(float, data[:4]))) class_text = data[4].strip() class_idx = labels.index(class_text) classes_text.append(class_text) classes.append(class_idx) xmins.append(bbox[0]/float(width)) xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box) ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testDecodeObjectLabelWithText(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = ['cat', 'dog'] # Annotation label gets overridden by labelmap id. annotated_bbox_classes = [3, 4] expected_bbox_classes = [1, 2] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(annotated_bbox_classes), })).SerializeToString() label_map_string = """ item { id:1 name:'cat' } item { id:2 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) init = tf.tables_initializer() with self.test_session() as sess: sess.run(init) tensor_dict = sess.run(tensor_dict) self.assertAllEqual(expected_bbox_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeImageKeyAndFilename(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/key/sha256': dataset_util.bytes_feature('abc'), 'image/filename': dataset_util.bytes_feature('filename') })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertEqual('abc', tensor_dict[fields.InputDataFields.key]) self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = ['cat', 'cheetah'] bbox_classes_id = [5, 6] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(bbox_classes_id), })).SerializeToString() label_map_string = """ item { name:'/m/cat' id:3 display_name:'cat' } item { name:'/m/dog' id:1 display_name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertAllEqual([3, -1], tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def testInstancesNotAvailableByDefault(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertTrue( fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def testDecodeMultiClassScores(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] flattened_multiclass_scores = [100., 50.] + [20., 30.] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/multiclass_scores': dataset_util.float_list_feature(flattened_multiclass_scores ), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_multiclass_scores=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(flattened_multiclass_scores, tensor_dict[fields.InputDataFields.multiclass_scores])
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), })) writer.write(example.SerializeToString()) writer.close() return path
def create_mock_tfrecord(): pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB') image_output_stream = StringIO.StringIO() pil_image.save(image_output_stream, format='png') encoded_image = image_output_stream.getvalue() feature_map = { 'test_field': dataset_util.float_list_feature([1, 2, 3, 4]), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map)) with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer: writer.write(tf_example.SerializeToString())
def dict_to_tf_example(filename, mask_path, label_map_dict, img_path): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: filename: name of the image mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by filename is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width = np.asarray(image).shape[1] height = np.asarray(image).shape[0] if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) mask_np = np.asarray(mask.convert('L')) if mask.format != 'PNG': raise ValueError('Mask format not PNG') xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for k in list(mask_pixel.keys()): class_name = k nonbackground_indices_x = np.any(mask_np == mask_pixel[class_name], axis=0) nonbackground_indices_y = np.any(mask_np == mask_pixel[class_name], axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray( nonzero_y_indices).shape[1] > 0: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) print(filename, 'bounding box for', class_name, xmin, xmax, ymin, ymax) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) mask_remapped = (mask_np == mask_pixel[class_name]).astype( np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_record(output_filename, num_shards, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): img_path = os.path.join(read_bucket, example) if not os.path.isfile(img_path): continue with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] # 'coke', 'pepsi', 'coke'... classes = [] # 1, 2, 1... difficult_obj = [] truncated = [] poses = [] for annotation in annotations[example]: if 'x' in annotation and 'x2' in annotation and 'y' in annotation and 'y2' in annotation: xmins.append(annotation['x']) xmaxs.append(annotation['x2']) ymins.append(annotation['y']) ymaxs.append(annotation['y2']) classes_text.append(annotation['label'].encode('utf8')) classes.append(1) # temporary, I need to assign labels to actual ids difficult_obj.append(0) truncated.append(0) poses.append(''.encode('utf8')) try: feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(example.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(example.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: print('Invalid example, ignoring.')
def create_tf_example(img_filename, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(img_filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size match = re.match(r'(.*)\.jpg', img_filename) filename = match.group(1) img_filename = img_filename.encode('utf8') img_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] label_filename = FLAGS.txt_input + '/' + filename + '.txt' file = open(label_filename, 'r') labels = file.readline() match = re.match(r'(.*)\s(.*)\s(.*)\s(.*)\s(.*)\s', labels) label = match.group(1) xmin = float(match.group(2)) ymin = float(match.group(3)) xmax = float(match.group(4)) ymax = float(match.group(5)) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append(label.encode('utf8')) classes.append(class_text_to_int(label)) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(img_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(split_data_dir, name, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert Munich datset to tf.Example proto. Convert the image "name" of the Munich dataset (train/val) into record file Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ IMG_TYPE = '.jpg' GT_EXT = '_gt.txt' file_name = name + IMG_TYPE img_path = os.path.join(split_data_dir, file_name) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() # Get the image shape information tmp_img = cv2.imread(img_path) height, width, depth = tmp_img.shape xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # Get the ground truth bounding box file gt_filename = os.path.join(split_data_dir, name + GT_EXT) with open(gt_filename) as f: lines = f.read().splitlines() for line in lines: difficult_obj.append(int(0)) line_float = [float(item) for item in line.split(',')] label = int(line_float[-1]) xmin.append(line_float[0] / width) # xmin ymin.append(line_float[1] / height) # ymin xmax.append(line_float[2] / width) # xmax ymax.append(line_float[3] / height) # ymax classes_text.append(OBJ_NAME[label].encode('utf8')) classes.append(USE_LABEL[label]) # Add zeros for truncated (not being used for Munich dataset) truncated.append(0) poses.append('none'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, images_dir, label_map_dict, ignore_difficult_instances=False, keep_empty_image=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join(images_dir, data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format == 'PNG' or image.size[0] > 1920: image.thumbnail((1920, 1920), PIL.Image.ANTIALIAS) temp_file = io.BytesIO() image.save(temp_file, format="jpeg") temp_file.seek(0) encoded_jpg = temp_file.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue if obj['name'] in custom_label_map: obj['name'] = custom_label_map.get(obj['name']) if not obj['name']: continue elif FLAGS.skip_category and obj['name'] in set(FLAGS.skip_category.split(',')): continue difficult_obj.append(int(difficult)) obj['name'] = obj['name'].lower() xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) label_count[obj['name']] = label_count.get(obj['name'], 0) + 1 if len(data['object']) > 0: label_count['total'] += 1 if len(classes) == 0 and not keep_empty_image: return example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(group, path, class_dict): import tensorflow as tf from object_detection.utils import dataset_util with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): if set(['xmin_rel', 'xmax_rel', 'ymin_rel', 'ymax_rel']).issubset(set(row.index)): xmin = row['xmin_rel'] xmax = row['xmax_rel'] ymin = row['ymin_rel'] ymax = row['ymax_rel'] elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)): xmin = row['xmin'] / width xmax = row['xmax'] / width ymin = row['ymin'] / height ymax = row['ymax'] / height xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(row['class'].encode('utf8')) classes.append(class_dict[row['class']]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
img_label = cv2.imread(label) img_mask = image2label(img_label) encoded_label = img_mask.astype(np.uint8).tobytes() height, width = img_label.shape[0], img_label.shape[1] # print('the ima height*** %d*******'%height) # print('the ima width*** %d*******'%width) if height < vgg_16.default_image_size or width < vgg_16.default_image_size: # 保证最后随机裁剪的尺寸 print('the ima default_image_size*** %d*******'%vgg_16.default_image_size) return None feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_data), 'image/label': dataset_util.bytes_feature(encoded_label), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example # Your code here, fill the dict ################ feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data.encode('utf8')),
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature( annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature( annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature( annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature( annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature( annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature( annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature( annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature( annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( annotations['3d_bbox_rot_y']), })) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], data['filename']+'.jpg') #os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) #print("full_path", full_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) brands[obj['name']]=brands[obj['name']]+1 xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) if (obj['name'] in label_map_dict.keys()): classes.append(label_map_dict[obj['name']]) else: print("WARNING",full_path) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) examples_list_number_classes_text[os.path.splitext(full_path)[0]]=len(classes_text) examples_list_number_classes[os.path.splitext(full_path)[0]]=len(classes) logging.info(xmin,ymin,xmax,ymax,classes_text,classes,poses,data['folder'], data['filename']) #print(xmin,ymin,xmax,ymax,classes_text,classes,poses,data['folder'], data['filename']) #print(xmin,ymin,xmax,ymax) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( str.encode(data['folder']+'/'+data['filename'],'utf8')), 'image/source_id': dataset_util.bytes_feature( str.encode(data['folder']+'/'+data['filename'],'utf8')), 'image/key/sha256': dataset_util.bytes_feature(str.encode(key,'utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(str.encode('jpeg','utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) if (examples_list_number_classes_text[os.path.splitext(full_path)[0]]!=examples_list_number_classes[os.path.splitext(full_path)[0]]): print(full_path,example) if (len(classes_text) ==0): #logging.info(example) print(full_path,example) print(full_path,examples_list_number_classes_text[os.path.splitext(full_path)[0]],examples_list_number_classes[os.path.splitext(full_path)[0]]) # extract pre-trained face detector #face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt.xml') # load color (BGR) image img = cv2.imread(full_path) # convert BGR image to grayscale #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ## find faces in image #faces = face_cascade.detectMultiScale(gray) # print number of faces detected in the image #print('Number of faces detected:', len(faces)) ## get bounding box for each detected face #for (x,y,w,h) in (xmin,ymin,xmax,ymax): # # add bounding box to color image #cv2.rectangle(img,(int(xmin[0]),int(ymin[0])),(int(xmax[0]),int(ymax[0])),(255,0,0),2) cv2.rectangle(img,(int(xmin[0]*width),int(ymin[0])),(int(xmax[0]),int(ymax[0])),(255,0,0),2) # convert BGR image to RGB for plotting cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #cv2.imshow("Image",cv_rgb) # display the image, along with bounding box plt.imshow(cv_rgb) plt.show() return example
def create_tf_example( example, size=None, label_map_dict=label_map_util.get_label_map_dict(LABEL_MAP_PATH)): img_file, boxes = example['img_file', 'boxes'] img = PIL.Image.open(img_file) width, height = img.size img_class_text = 'hand' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for box in boxes: if box is not None: xmins.append(float(box['x_min'] + 1) / width) xmaxs.append(float(box['x_max'] + 1) / width) ymins.append(float(box['y_min'] + 1) / height) ymaxs.append(float(box['y_max'] + 1) / height) classes_text.append(img_class_text.encode('utf-8')) classes.append(label_map_dict[img_class_text]) if len(xmins) == 0: return None if size is not None: width, height = size assert (width > 0 and height > 0) img = img.resize((width, height)) with io.BufferedRandom(io.BytesIO()) as br: img.save(br, "JPEG") br.seek(0) img_encoded = br.read() img_encoded_format = b'jpeg' tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_file.encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(img_file.encode('utf-8')), 'image/encoded': dataset_util.bytes_feature(img_encoded), 'image/format': dataset_util.bytes_feature(img_encoded_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes) })) return tf_example
def dict_to_tf_example(data_dict, label_map_dict, data_dir): """ Creates training example object (see tf.train.Example) Arguments: data_dict dictionary created from a PASCAL VOC annotation file label_map_dict dictionary containing class_id to class_name mappings Returns: A tf.train.Example object containing bounding box annotation data as well as encoded JPEG data. """ # Extract information from dictionary image_filename = data_dict['filename'] width = int(data_dict['size']['width']) height = int(data_dict['size']['height']) # Get JPEG data as encoded bytes image_path = os.path.join(data_dir, image_filename) encoded_jpg = get_encoded_jpeg(image_path) # Create array of class labels for the annotations (i.e., bounding boxes) # associated with this training example xmin = [] ymin = [] xmax = [] ymax = [] class_ids = [] class_names = [] if 'object' in data_dict: for obj in data_dict['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_ids.append(label_map_dict[obj['name']]) class_names.append(obj['name'].encode('utf8')) key = hashlib.sha256(encoded_jpg).hexdigest() # Create training example example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(class_names), 'image/object/class/label': dataset_util.int64_list_feature(class_ids) })) return example
def image_to_tf_data(img_path, mask_path, xml_path, label_map_dict, filename): """Convert image and annotations to tf.tf_data proto. Note: if an image contains more than one object from same class then xmls files with bounding box annotation need to be provided Args: img_path: String specifying subdirectory within the dataset directory holding the actual image data. mask_path: String path to PNG encoded mask. xml_path: String path to XML file holding bounding box annotations label_map_dict: A map from string label names to integers ids. filename: Name of the image Returns: example: The converted tf.tf_data Raises: ValueError: if the image pointed to by filename is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width = np.asarray(image).shape[1] height = np.asarray(image).shape[0] if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) mask_np = np.asarray(mask.convert('L')) if mask.format != 'PNG': raise ValueError('Mask format not PNG') data = [] classes = [] classes_text = [] xmins = [] ymins = [] xmaxs = [] ymaxs = [] encoded_mask_png_list = [] if (True == FLAGS.bboxes_provided): if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) return with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] if 'object' in data: for obj in data['object']: class_name = obj['name'] pixel_val = int(label_map_dict[class_name][1]) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) print(filename, 'bounding box for', class_name, xmin, xmax, ymin, ymax) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name][0]) mask_np_black = mask_np * 0 mask_np_black[int(ymin):int(ymax), int(xmin):int(xmax)] = mask_np[ int(ymin):int(ymax), int(xmin):int(xmax)] mask_remapped = (mask_np_black == pixel_val).astype(np.uint8) img = PIL.Image.fromarray(mask_remapped) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) else: for key in label_map_dict.keys(): class_name = key pixel_val = int(label_map_dict[class_name][1]) nonbackground_indices_x = np.any(mask_np == pixel_val, axis=0) nonbackground_indices_y = np.any(mask_np == pixel_val, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray( nonzero_y_indices).shape[1] > 0: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) print(filename, 'bounding box for', class_name, xmin, xmax, ymin, ymax) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name][0]) mask_remapped = (mask_np == pixel_val).astype(np.uint8) img = PIL.Image.fromarray(mask_remapped) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(encoded_mask_png_list) } tf_data = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return tf_data
ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) #if not faces_only: # mask_remapped = (mask_np != 2).astype(np.uint8) # masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), }
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame.LabelName.map( lambda x: label_map[x]).as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame.IsGroupOf.as_matrix().astype(int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame.IsTruncated.as_matrix().astype( int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame.IsDepiction.as_matrix().astype( int)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for cloud_derby. Otherwise generates bounding boxes (as well as segmentations for full cloud_derby). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(labels_corners, labels_center, labels_data, params, label_map_dict, image_dir, image_prefix, image_prev_prefix): width = round(params['pointcloud_grid_map_interface']['grids']['cartesian'] ['range']['y'] / params['pointcloud_grid_map_interface'] ['grids']['cartesian']['resolution']['y']) height = round( params['pointcloud_grid_map_interface']['grids']['cartesian']['range'] ['x'] / params['pointcloud_grid_map_interface']['grids']['cartesian'] ['resolution']['x']) xmin = [] ymin = [] xmax = [] ymax = [] x_c = [] y_c = [] w = [] h = [] angle = [] sin_angle = [] cos_angle = [] classes = [] classes_text = [] for idx, label_corner in enumerate(labels_corners): x_min = min(label_corner[0]) / width y_min = min(label_corner[1]) / height x_max = max(label_corner[0]) / width y_max = max(label_corner[1]) / height num_detections = _count_number_detections(image_dir, image_prefix, (x_min, y_min, x_max, y_max), params) if num_detections == 0: continue xmin.append(x_min) ymin.append(y_min) xmax.append(x_max) ymax.append(y_max) if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1): print(x_min, y_min, x_max, y_max) raise ValueError('Box Parameters greather than 1.0') if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0): raise ValueError('Box Parameters less than 0.0') x_c.append(labels_center[idx][0]) y_c.append(labels_center[idx][1]) angle_rad = labels_data[idx].rz angle.append(angle_rad) sin_angle.append(math.sin(2 * angle_rad)) cos_angle.append(math.cos(2 * angle_rad)) vec_s_x = math.cos(angle_rad) vec_s_y = math.sin(angle_rad) w_p = labels_data[idx].w / params['pointcloud_grid_map_interface'][ 'grids']['cartesian']['resolution']['y'] w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x / (width * width) + vec_s_y * vec_s_y / (height * height)) w.append(w_p_s) l_p = labels_data[idx].l / params['pointcloud_grid_map_interface'][ 'grids']['cartesian']['resolution']['x'] l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x / (height * height) + vec_s_y * vec_s_y / (width * width)) h.append(l_p_s) class_name = labels_data[idx].type classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) return tf.train.Example(features=tf.train.Features( feature={ 'id': dataset_util.bytes_feature(image_prefix.encode('utf8')), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'layers/height': dataset_util.int64_feature(height), 'layers/width': dataset_util.int64_feature(width), 'layers/detections/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'detections_cartesian')), 'layers/observations/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'observations_cartesian')), 'layers/decay_rate/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'decay_rate_cartesian')), 'layers/intensity/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'intensity_cartesian')), 'layers/zmin/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_min_detections_cartesian')), 'layers/zmax/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_max_detections_cartesian')), 'layers/occlusions/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_max_occlusions_cartesian')), 'layers_prev/detections/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'detections_cartesian')), 'layers_prev/observations/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'observations_cartesian')), 'layers_prev/decay_rate/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'decay_rate_cartesian')), 'layers_prev/intensity/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'intensity_cartesian')), 'layers_prev/zmin/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'z_min_detections_cartesian')), 'layers_prev/zmax/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'z_max_detections_cartesian')), 'layers_prev/occlusions/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prev_prefix, 'z_max_occlusions_cartesian')), 'boxes/aligned/x_min': dataset_util.float_list_feature(xmin), 'boxes/aligned/x_max': dataset_util.float_list_feature(xmax), 'boxes/aligned/y_min': dataset_util.float_list_feature(ymin), 'boxes/aligned/y_max': dataset_util.float_list_feature(ymax), 'boxes/inclined/x_c': dataset_util.float_list_feature(x_c), 'boxes/inclined/y_c': dataset_util.float_list_feature(y_c), 'boxes/inclined/w': dataset_util.float_list_feature(w), 'boxes/inclined/h': dataset_util.float_list_feature(h), 'boxes/inclined/angle': dataset_util.float_list_feature(angle), 'boxes/inclined/sin_angle': dataset_util.float_list_feature(sin_angle), 'boxes/inclined/cos_angle': dataset_util.float_list_feature(cos_angle), 'boxes/class/text': dataset_util.bytes_list_feature(classes_text), 'boxes/class/label': dataset_util.int64_list_feature(classes), }))
def CreateTFExample(img_path, img_name, annotation): """ create tf record example this function runs once per image args: img_path: image path img_name: image name annotation: annotation dictionary for current image """ #img_name=annotation['name'] # for viewnyx part 2 with tf.gfile.GFile(os.path.join(img_path, img_name), 'rb') as fid: encoded_jpg = fid.read() img_format = img_name.split('.')[-1] width = annotation['width'] height = annotation['height'] xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for bbx in annotation['annotations']: xmins.append(bbx['x'] / width) xmaxs.append((bbx['x'] + bbx['width']) / width) ymins.append(bbx['y'] / height) ymaxs.append((bbx['y'] + bbx['height']) / height) classes_text.append(bbx['label'].lower().encode('utf8')) classes.append(GetClassID(bbx['label'].lower())) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(img_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(name): # TODO(user): Populate the following variables from your example. b_image = util.encode_image_png( os.path.join(source_img_dir, name) + '.' + fileformat) label_objects = util.parse_dota_poly( os.path.join(source_label_dir, name) + '.txt') width, height = Image.open( os.path.join(source_img_dir, name) + '.' + fileformat).size # Image width, height filename = name.encode( ) # Filename of the image. Empty if image is not from file encoded_image_data = b_image # Encoded image bytes if fileformat == 'jpg': image_format = b'jpeg' else: image_format = b'png' # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ ] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for obj in label_objects: poly = obj['poly'] xmin, xmax, ymin, ymax = util.dots4ToRec4(poly) xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(obj['name'].encode()) classes.append(util.dota_10.index(obj['name'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, image_dir, label_map_dict): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Arguments: data: dict holding XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_dir: Path to image directory. label_map_dict: A map from string label names to integers ids. Returns: example: The converted tf.Example. """ full_path = os.path.join(image_dir, data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] try: for obj in data['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) except KeyError: print(data['filename'] + ' without objects!') difficult_obj = [0] * len(classes) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj) })) return example
def create_tf_example(example): image_path = os.getcwd() + '/' + FLAGS.images_dir + example labels_path = os.getcwd() + '/' + FLAGS.labels_dir + os.path.splitext( example)[0] + '.xml' with tf.gfile.GFile(image_path, 'rb') as fid: encoded_img = fid.read() encoded_io = io.BytesIO(encoded_img) image = PIL.Image.open(encoded_io) key = hashlib.sha256(encoded_img).hexdigest() with tf.gfile.GFile(labels_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] # Read the image # img = Image.open(image_path) width = int(data['size']['width']) height = int(data['size']['height']) image_format = 'png' # Read the label XML # tree = ET.parse(labels_path) # root = tree.getroot() xmins = [] ymins = [] xmaxs = [] ymaxs = [] for obj in data['object']: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text = ['target'.encode('utf8')] classes = [1] print(xmins) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_img), 'image/format': dataset_util.bytes_feature(image_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, label_map_dict, img_path): with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = image.width height = image.height xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] for shape in data['Layers']['Layer']['Shapes']['Shape']: text = shape['BlockText']['Text'].text if not (text.startswith('Panel') or text.startswith('panel')): continue attrib = shape['Data']['Extent'].attrib x = float(attrib['X']) y = float(attrib['Y']) w = float(attrib['Width']) h = float(attrib['Height']) xmin = x xmax = x + w ymin = y ymax = y + h xmin /= width ymin /= height xmax /= width ymax /= height if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01: print(img_path) xmins.append(xmin) ymins.append(ymin) xmaxs.append(xmax) ymaxs.append(ymax) class_name = 'Panel' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, categories, image_subdirectory='JPEGImages', flip=False, eval=False): full_path = get_image_full_path(dataset_directory, image_subdirectory, data['filename']) if eval: cop = 'data/inference/' + dataset_directory.split('/')[-2] + '-' + data['filename'] + '.' + full_path.split('.')[-1] copyfile(full_path, cop) encoded_jpg = preprocess_image(full_path, horizontal_flip=flip) width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] if 'object' not in data.keys(): print('No label detected in the xml format') else: for obj in data['object']: if obj['name'] in categories: if flip: print('flip') c = str(get_horizontal_flipped_index(int(obj['name']))) xmin.append(1.0 - float(obj['bndbox']['xmax']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(1.0 - float(obj['bndbox']['xmin']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(c.encode('utf8')) classes.append(label_map_dict[c]) else: c = obj['name'] xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) if max([float(obj['bndbox']['xmin']) / width, float(obj['bndbox']['ymin']) / height, float(obj['bndbox']['xmax']) / width, float(obj['bndbox']['ymax']) / height]) > 1.0: print('error') raise Exception('oops') classes_text.append(c.encode('utf8')) classes.append(label_map_dict[c]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def create_tf_detection_example(group, path): try: with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() except: traceback.print_exc() print('error in opening: ' + os.path.join(path, '{}'.format(group.filename))) return None filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] # im = Image.open(os.path.join(path, '{}'.format(group.filename))) for index, row in group.object.iterrows(): # if row['class'] == 2: # continue if max(int(row['lt']), int(row['lb'])) >= int(row['width']) or max(int(row['tl']), int(row['tr'])) >= int( row['height']) or min(int(row['rt']), int(row['rb'])) <= 0 or min(int(row['br']), int(row['bl'])) <= 0: continue xmin = min(float(row['lt']), float(row['lb'])) xmax = max(float(row['rt']), float(row['rb'])) ymin = min(float(row['tl']), float(row['tr'])) ymax = max(float(row['bl']), float(row['br'])) xmin += -6 # random.randint(-5, 2) xmax += 6 # random.randint(2, 5) ymin += -5 # random.randint(-5, 2) ymax += 5 # random.randint(2, 5) xmin = max(0, xmin) xmax = min(xmax, row['width']) ymin = max(0, ymin) ymax = min(ymax, row['height']) # d_im = im.crop((xmin, ymin, xmax, ymax)) # d_im.show("") # return None xmins.append(xmin / row['width']) xmaxs.append(xmax / row['width']) ymins.append(ymin / row['height']) ymaxs.append(ymax / row['height']) classes_text.append(class_int_to_text(row['class'])) classes.append(class_int_to_int(row['class'])) if len(xmins) == 0: return None tf_example_detection = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(row['height']), 'image/width': dataset_util.int64_feature(row['width']), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example_detection
def _create_tf_example(input: pd.Series) -> tf.train.Example: # outputs one tf record per call. Therefore, to convert all relevant images to tf records we need to loop and # call this method on each EXAMPLE """example: the input that holds all necessary info to convert it to a tf record. Not actual JPG file but some object that holds all the info about that jpg INPUT: is a pandas series :returns a tf.train.Example which i think is then transformed into a string and that string is used as input to be written by a TFRecord writer into a file, and that file is in TFRecord format""" height = input[csv_column_names[2]] # Image height width = input[csv_column_names[1]] # Image width # NOTE: needed to encode the below string - the example in the github just leaves in str format which will give # you a type error in the below Example() initialization filename = str.encode( input[csv_column_names[0]] ) # Filename of the image. Empty if image is not from file. Note not the whole path. only the filename # NOTE: gfile is mostly just a wrappe for Python's filesystem with open API. But can handle opening files # that are not local (like on google storage and HDFS as well) # see: https://stackoverflow.com/questions/42256938/what-does-tf-gfile-do-in-tensorflow?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa with tf.gfile.GFile(input[path_column], 'rb') as fid: # this jpg is in bytes format which is used when creating an Example object encoded_jpg = fid.read() # NOTE: in contrast with the raccoon dataset github don't open image to get width and height because we already have that # info when converting to xml so I think that is duplicative. Not sure why he does it image_format = b'jpg' # b'jpeg' or b'png' - assume jpg files # below lists just have one element in them. see example: # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md xmins = [ input[csv_column_names[4]] / width ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [input[csv_column_names[6]] / width ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ input[csv_column_names[5]] / height ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [input[csv_column_names[7]] / height ] # List of normalized bottom y coordinates in bounding box # (1 per box) # NOTE: needed to encode the below string - the example in the github just leaves in str format which will give # you a type error in the below Example() initialization classes_text = [str.encode(input[csv_column_names[3]]) ] # List of string class name of bounding box (1 per box) classes = [label_map[input[csv_column_names[3]]] ] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def _create_tf_example(row, img_input): if _is_benign(row): folder_name = 'benign' elif _is_cancer(row): folder_name = 'cancer' else: raise InvalidFileNameError("Invalid Filename") full_path = os.path.join(img_input, folder_name, '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') channels = row['channels'] shape = [int(height), int(width), int(channels)] image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [_class_text_to_int(row['class'])] difficult = [0] truncated = [0] tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/channels': dataset_util.int64_feature(channels), 'image/shape': dataset_util.int64_list_feature(shape), 'image/class': dataset_util.int64_list_feature(classes), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/bbox/label': dataset_util.int64_list_feature(classes), 'image/object/bbox/label_text': dataset_util.bytes_list_feature(classes_text), 'image/object/bbox/difficult': dataset_util.int64_list_feature(difficult), 'image/object/bbox/truncated': dataset_util.int64_list_feature(truncated), 'image/format': dataset_util.bytes_feature(image_format), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), })) return tf_example
def create_tf_example(example): height = 660 # Image height width = 512 # Image width filename = example[0]['image'][ 'original_filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example[0]['image']['original_filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example[1]['boxes']: xmins.append(float(box['x_min'] / width)) xmaxs.append(float(box['x_max'] / width)) ymins.append(float(box['y_min'] / height)) ymaxs.append(float(box['y_max'] / height)) classes_text.append(box['label_name'].encode()) classes.append(int(LABEL_DICT[box['label_id']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature( annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature( annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature( annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature( annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature( annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature( annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature( annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature( annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( annotations['3d_bbox_rot_y']), })) return example
def create_tf_example(image_path, image, annotations_list, category_index, include_masks=False): image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] with tf.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for idx, object_annotations in enumerate(annotations_list): (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: segm = object_annotations['segmentation'] if isinstance(segm, list): rles = mask.frPyObjects(segm, image_height, image_width) rle = mask.merge(rles) m = mask.decode(rle) else: m = mask.decode(segm) pil_image = PIL.Image.fromarray(m) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if DUMP_MASK_IMAGES: m[m > 0] = 255 pil_image = PIL.Image.fromarray(m) save_path = filename.split('.')[0] + "_" + str(idx) + ".png" save_path = FLAGS.output_dir + '/' + filename.split( '.')[0] + '_mask_' + str(idx) + '.png' pil_image.save(save_path) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_record(output_filename, num_shards, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): img_path = os.path.join(read_bucket, example) if not os.path.isfile(img_path): continue with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] # 'coke', 'pepsi', 'coke'... classes = [] # 1, 2, 1... difficult_obj = [] truncated = [] poses = [] for annotation in annotations[example]: xmins.append(annotation['x']) xmaxs.append(annotation['x2']) ymins.append(annotation['y']) ymaxs.append(annotation['y2']) classes_text.append(annotation['label'].encode('utf8')) classes.append(1) # temporary, I need to assign labels to actual ids difficult_obj.append(0) truncated.append(0) poses.append(''.encode('utf8')) try: feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(example.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(example.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: print('Invalid example, ignoring.')
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame.LabelName.map(lambda x: label_map[x]) .as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame.IsGroupOf.as_matrix().astype(int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame.IsTruncated.as_matrix().astype(int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame.IsDepiction.as_matrix().astype(int)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() # with tf.gfile.GFile(mask_path, 'rb') as fid: # encoded_mask_png = fid.read() # encoded_png_io = io.BytesIO(encoded_mask_png) # mask = PIL.Image.open(encoded_png_io) # if mask.format != 'PNG': # raise ValueError('Mask format not PNG') # mask_np = np.asarray(mask) # nonbackground_indices_x = np.any(mask_np != 2, axis=0) # nonbackground_indices_y = np.any(mask_np != 2, axis=1) # nonzero_x_indices = np.where(nonbackground_indices_x) # nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) # if faces_only: # xmin = float(obj['bndbox']['xmin']) # xmax = float(obj['bndbox']['xmax']) # ymin = float(obj['bndbox']['ymin']) # ymax = float(obj['bndbox']['ymax']) # else: # xmin = float(np.min(nonzero_x_indices)) # xmax = float(np.max(nonzero_x_indices)) # ymin = float(np.min(nonzero_y_indices)) # ymax = float(np.max(nonzero_y_indices)) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) #class_name = get_class_name_from_filename(data['filename']) class_name = obj['name'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) # if not faces_only: # mask_remapped = mask_np != 2 # masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } # if not faces_only: # mask_stack = np.stack(masks).astype(np.float32) # masks_flattened = np.reshape(mask_stack, [-1]) # feature_dict['image/object/mask'] = ( # dataset_util.float_list_feature(masks_flattened.tolist())) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(group, path): try: with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) except: print("no existing file:", os.path.join(path, '{}'.format(group.filename))) return image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmin = row['xmin'] / width xmax = row['xmax'] / width ymin = row['ymin'] / height ymax = row['ymax'] / height xmin = np.min(xmin, xmax) xmax = np.max(xmin, xmax) ymin = np.min(ymin, ymax) ymax = np.max(ymin, ymax) if xmin < 0.0: xmin = 0.0 elif xmin > 1.0: xmin = 1.0 if xmax < 0.0: xmax = 0.0 elif xmax > 1.0: xmax = 1.0 if ymin < 0.0: ymin = 0.0 elif ymin > 1.0: ymin = 1.0 if ymax < 0.0: ymax = 0.0 elif ymax > 1.0: ymax = 1.0 xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymaxs) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example