def create_tf_example(row): full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [class_text_to_int(row['class'])] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def testDecodeDefaultGroundtruthWeights(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], np.ones(2, dtype=np.float32))
def create_tf_example(example, writer): height = example['height'] width = example['width'] filename = example['filename'] encoded_image_data = example['encoded_image_data'] image_format = example['image_format'] bboxes = example['bbox'] xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box classes_text = example['class_text'] classes = example['class_idx'] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def createTfExample(singleFileData, path): # use TensorFlow's GFile function to open the .jpg image matching the current box data with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile: tensorFlowImage = tensorFlowImageFile.read() # end with # get the image width and height via converting from a TensorFlow image to an io library BytesIO image, # then to a PIL Image, then breaking out the width and height bytesIoImage = io.BytesIO(tensorFlowImage) pilImage = Image.open(bytesIoImage) width, height = pilImage.size # get the file name from the file data passed in, and set the image format to .jpg fileName = singleFileData.filename.encode('utf8') imageFormat = b'jpg' # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer xMins = [] xMaxs = [] yMins = [] yMaxs = [] classesAsText = [] classesAsInts = [] # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box) for index, row in singleFileData.object.iterrows(): xMins.append(row['xmin'] / width) xMaxs.append(row['xmax'] / width) yMins.append(row['ymin'] / height) yMaxs.append(row['ymax'] / height) classesAsText.append(row['class'].encode('utf8')) classesAsInts.append(classAsTextToClassAsInt(row['class'])) # end for # finally we can calculate and return the TensorFlow Example tfExample = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fileName), 'image/source_id': dataset_util.bytes_feature(fileName), 'image/encoded': dataset_util.bytes_feature(tensorFlowImage), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xMins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(yMins), 'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classesAsText), 'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)})) return tfExample
def testDecodeObjectWeight(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) object_weights = [0.75, 1.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/weight': dataset_util.float_list_feature(object_weights), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights] .get_shape().as_list()), [None]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(object_weights, tensor_dict[fields.InputDataFields.groundtruth_weights])
def create_tf_example(filename, writer): lines = open(filename).readlines() image_filename = lines[0].strip()[1:] classes_text = [] classes = [] xmins = [] xmaxs = [] ymins = [] ymaxs = [] im = Image.open(image_filename) arr = io.BytesIO() im.save(arr, format='PNG') height = im.height width = im.width encoded_image_data = arr.getvalue() image_format = 'png' for line in lines[1:]: line = line.strip() if line == '': continue data = line.split(",") bbox = list(map(int, map(float, data[:4]))) class_text = data[4].strip() class_idx = labels.index(class_text) classes_text.append(class_text) classes.append(class_idx) xmins.append(bbox[0]/float(width)) xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box) ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testDecodeInstanceSegmentation(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_instance_masks] .get_shape().as_list()), [4, 5, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] .get_shape().as_list()), [4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( instance_masks.astype(np.float32), tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) self.assertAllEqual(object_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def testDecodeMultiClassScores(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] flattened_multiclass_scores = [100., 50.] + [20., 30.] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/multiclass_scores': dataset_util.float_list_feature(flattened_multiclass_scores ), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_multiclass_scores=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual(flattened_multiclass_scores, tensor_dict[fields.InputDataFields.multiclass_scores])
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), })) writer.write(example.SerializeToString()) writer.close() return path
def create_mock_tfrecord(): pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB') image_output_stream = StringIO.StringIO() pil_image.save(image_output_stream, format='png') encoded_image = image_output_stream.getvalue() feature_map = { 'test_field': dataset_util.float_list_feature([1, 2, 3, 4]), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map)) with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer: writer.write(tf_example.SerializeToString())
def testDecodeKeypoint(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_ymins = [0.0, 4.0] bbox_xmins = [1.0, 5.0] bbox_ymaxs = [2.0, 6.0] bbox_xmaxs = [3.0, 7.0] keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/bbox/ymin': dataset_util.float_list_feature(bbox_ymins), 'image/object/bbox/xmin': dataset_util.float_list_feature(bbox_xmins), 'image/object/bbox/ymax': dataset_util.float_list_feature(bbox_ymaxs), 'image/object/bbox/xmax': dataset_util.float_list_feature(bbox_xmaxs), 'image/object/keypoint/y': dataset_util.float_list_feature(keypoint_ys), 'image/object/keypoint/x': dataset_util.float_list_feature(keypoint_xs), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes] .get_shape().as_list()), [None, 4]) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape() .as_list()), [2, 3, 2]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs, bbox_xmaxs]).transpose() self.assertAllEqual(expected_boxes, tensor_dict[fields.InputDataFields.groundtruth_boxes]) self.assertAllEqual( 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) expected_keypoints = ( np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) self.assertAllEqual( expected_keypoints, tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def testInstancesNotAvailableByDefault(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertTrue( fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def dict_to_tf_example(split_data_dir, name, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert Munich datset to tf.Example proto. Convert the image "name" of the Munich dataset (train/val) into record file Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ IMG_TYPE = '.jpg' GT_EXT = '_gt.txt' file_name = name + IMG_TYPE img_path = os.path.join(split_data_dir, file_name) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() # Get the image shape information tmp_img = cv2.imread(img_path) height, width, depth = tmp_img.shape xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # Get the ground truth bounding box file gt_filename = os.path.join(split_data_dir, name + GT_EXT) with open(gt_filename) as f: lines = f.read().splitlines() for line in lines: difficult_obj.append(int(0)) line_float = [float(item) for item in line.split(',')] label = int(line_float[-1]) xmin.append(line_float[0] / width) # xmin ymin.append(line_float[1] / height) # ymin xmax.append(line_float[2] / width) # xmax ymax.append(line_float[3] / height) # ymax classes_text.append(OBJ_NAME[label].encode('utf8')) classes.append(USE_LABEL[label]) # Add zeros for truncated (not being used for Munich dataset) truncated.append(0) poses.append('none'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, annotations_dir, images_dir, label_map_dict, include_masks, ignore_difficult_instances): image_path = os.path.join(images_dir, data['filename']) with tf.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for idx, obj in enumerate(data['object']): difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_id = getClassId(obj['name'], label_map_dict) classes_text.append(obj['name'].encode('utf8')) classes.append(class_id) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if include_masks: mask_path = os.path.join( annotations_dir, "masks", os.path.splitext(data['filename'])[0] + '_' + str(idx) + '.png') with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) mask_remapped = (mask_np == 255).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if include_masks: encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def create_tf_example(example, cvat_name, saved_dict, img_dir): # Process one image data per run height = int(example.attrib['height']) # Image height width = int(example.attrib['width']) # Image width filename = os.path.join(img_dir, example.attrib['name']) _, ext = os.path.splitext(example.attrib['name']) filename = filename.encode('utf8') try: with tf.io.gfile.GFile(filename,'rb') as fid: encoded_jpg = fid.read() except Exception: #traceback.print_exc() print(filename, 'not found') with open('logs.txt',"a") as f: f.write(filename) return None key = hashlib.sha256(encoded_jpg).hexdigest() if ext.lower() in ['.jpg','.jpeg'] : image_format = 'jpeg'.encode('utf8') elif ext.lower() == '.png' : image_format = 'png'.encode('utf8') else: print('File Format not supported, Skipping') return None xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) # Loop oer the boxes and fill the above fields counter = 0 for box in example: list_of_labels = [ "poster_posm_1" ] if box.attrib['label'] in list_of_labels: counter += 1 box_name = '' if cvat_name == "" : box_name = box.attrib['label'] else : for attr in box: if attr.attrib['name'] == cvat_name: box_name = attr.text.lower() # filter out non-ASCII characters box_name = ''.join(filter(lambda x: x in printable, box_name)) if 'points' in box.attrib: points=box.attrib['points'] points = [x.split(",") for x in points.split(";")] x_val = [float(r[0]) for r in points] y_val = [float(r[1]) for r in points] xmins.append(float(min(x_val)) / width) xmaxs.append(float(max(x_val)) / width) ymins.append(float(min(y_val)) / height) ymaxs.append(float(max(y_val)) / height) classes_text.append(box_name.encode('utf8')) classes.append(saved_dict[box_name]) # print("Converted a polygon") elif box_name in saved_dict.keys(): xmin = float(box.attrib['xtl']) xmax = float(box.attrib['xbr']) ymin = float(box.attrib['ytl']) ymax = float(box.attrib['ybr']) error = False if xmin > width: error = True # print('XMIN > width for file', filename) if xmin <= 0: error = True # print('XMIN < 0 for file', filename) if xmax > width: error = True # print('XMAX > width for file', filename) if ymin > height: error = True # print('YMIN > height for file', filename) if ymin <= 0: error = True # print('YMIN < 0 for file', filename) if ymax > height: error = True # print('YMAX > height for file', filename) if xmin >= xmax: error = True # print('xmin >= xmax for file', filename) if ymin >= ymax: error = True # print('ymin >= ymax for file', filename) # if error == True: # print('Error for file: %s' % filename) # print() if error==False : xmins.append(float(box.attrib['xtl']) / width) xmaxs.append(float(box.attrib['xbr']) / width) ymins.append(float(box.attrib['ytl']) / height) ymaxs.append(float(box.attrib['ybr']) / height) classes_text.append(box_name.encode('utf8')) classes.append(saved_dict[box_name]) # print("Converted a box") else: continue if counter == 0: return None tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(labels_image, label_data, crop_size, params, label_map_dict, image_dir, image_prefix): img_name_hits = image_prefix + '_hits.png' img_name_obs = image_prefix + '_observations.png' img_name_int = image_prefix + '_intensity.png' img_name_zmin = image_prefix + '_zmin.png' img_name_zmax = image_prefix + '_zmax.png' img_path_hits = os.path.join(image_dir, img_name_hits) img_path_obs = os.path.join(image_dir, img_name_obs) img_path_int = os.path.join(image_dir, img_name_int) img_path_zmin = os.path.join(image_dir, img_name_zmin) img_path_zmax = os.path.join(image_dir, img_name_zmax) image_hits = cv2.imread(img_path_hits, 0) image_obs = cv2.imread(img_path_obs, 0) image_int = cv2.imread(img_path_int, 0) image_zmin = cv2.imread(img_path_zmin, 0) image_zmax = cv2.imread(img_path_zmax, 0) inputs_stacked = np.stack( [image_hits, image_obs, image_int, image_zmin, image_zmax], axis=-1) length_crop_diff = inputs_stacked.shape[0] - crop_size[1] width_crop_diff = (inputs_stacked.shape[1] - crop_size[0]) / 2 inputs_stacked = inputs_stacked[ int(length_crop_diff):inputs_stacked.shape[0], int(width_crop_diff):int(inputs_stacked.shape[1] - width_crop_diff)] print(inputs_stacked.shape) encoded_inputs = inputs_stacked.tostring() width = crop_size[0] height = crop_size[1] xmin = [] ymin = [] xmax = [] ymax = [] x_c = [] y_c = [] w = [] h = [] angle = [] sin_angle = [] cos_angle = [] classes = [] classes_text = [] for idx, label_img in enumerate(labels_image): #print('xmin', int(min(label_img[0]))) #print('xmax', int(max(label_img[0]))) #print('ymin', int(min(label_img[1]))) #print('ymax', int(max(label_img[1]))) xmin.append(int(min(label_img[0])) / width) ymin.append(int(min(label_img[1])) / height) xmax.append(int(max(label_img[0])) / width) ymax.append(int(max(label_img[1])) / height) x_c.append( (int(min(label_img[0])) + int(max(label_img[0]))) / (2 * width)) y_c.append( (int(min(label_img[1])) + int(max(label_img[1]))) / (2 * height)) angle_rad = label_data[idx].ry #print('angle', angle_rad) angle.append(angle_rad * 180 / 3.141) #print('angle', angle) sin_angle.append(math.sin(2 * angle_rad)) cos_angle.append(math.cos(2 * angle_rad)) vec_s_x = math.cos(angle_rad) vec_s_y = math.sin(angle_rad) w_p = label_data[idx].w / params['batch_processor']['resolution'] #print('w_p', w_p) w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x / (height * height) + vec_s_y * vec_s_y / (width * width)) #print('w_p_s', w_p_s) w.append(w_p_s) l_p = label_data[idx].l / params['batch_processor']['resolution'] #print('l_p', l_p) l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x / (width * width) + vec_s_y * vec_s_y / (height * height)) #print('l_p_s', l_p_s) h.append(l_p_s) class_name = label_data[idx].type #print('type', class_name) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature((image_prefix + '.png').encode('utf8')), 'image/source_id': dataset_util.bytes_feature((image_prefix + '.png').encode('utf8')), 'image/channels': dataset_util.int64_feature(inputs_stacked.shape[2]), 'image/encoded': dataset_util.bytes_feature(encoded_inputs), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/bboxrot/x_c': dataset_util.float_list_feature(x_c), 'image/object/bboxrot/y_c': dataset_util.float_list_feature(y_c), 'image/object/bboxrot/w': dataset_util.float_list_feature(w), 'image/object/bboxrot/h': dataset_util.float_list_feature(h), 'image/object/bboxrot/angle': dataset_util.float_list_feature(angle), 'image/object/bboxrot/sin_angle': dataset_util.float_list_feature(sin_angle), 'image/object/bboxrot/cos_angle': dataset_util.float_list_feature(cos_angle), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def create_tf_example(root,image_file): # Check if annotation xml file exists filename, file_extension = os.path.splitext(image_file) with tf.gfile.GFile(os.path.join(root, image_file), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') with tf.gfile.GFile(os.path.join(root, image_file), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) width, height = image.size print(image_file,width,height) BB = get_cordinates(filename) xmin = [] ymin = [] ymax = [] xmax = [] classes_id = [] classes_text = [] #for each label in all labels # BB = all labels # item = one label (4 coordinates, 1 class id, 1 class name) if BB: for item in BB: coordinates = item[1] Y=[] X=[] for coord in coordinates: Y.append(coord['y']) X.append(coord['x']) xmin.append(float(min(X)/width)) xmax.append(float(max(X)/width)) ymin.append(float(min(Y)/height)) ymax.append(float(max(Y)/height)) # classes_id.append(get_class_id(item[0])) classes_id.append(get_class_id_4_classes(item[0])) #print (item[0]) classes_text.append(item[0].encode('utf8')) print ('CLASSES_TEXT_after',classes_text) enter = True if enter: filename = image_file tf_example=[] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), #'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value = [bytes(image_file, encoding= 'utf-8')])), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes_id), })) return tf_example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for idx, object_annotations in enumerate(annotations_list): (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: binary_mask = mask.decode(object_annotations['segmentation']) mask_shape = binary_mask.shape binary_mask = np.ravel(binary_mask, order='F') binary_mask = binary_mask.reshape(mask_shape, order='C') if FLAGS.dump_masks: binary_mask[binary_mask > 0] = 255 pil_image = PIL.Image.fromarray(binary_mask) pil_image.save(FLAGS.train_image_dir + "/mask_" + str(image_id) + "_" + str(idx) + ".png") else: pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def lab_format_to_tf_example(self, json_content): # turn the lab-format directory into the format accepted by tfrecord in_img_path = os.path.join(json_content["folder"], json_content["filename"]) with tf.gfile.GFile(in_img_path, 'rb') as fid: encoded_img = fid.read() size, pix = image.get_img_info(in_img_path) width = int(size[0]) height = int(size[1]) filename = json_content['filename'].encode('utf8') _, file_extension = os.path.splitext(json_content['filename']) # get image format. e.g. '.jpg' file_extension = file_extension.replace('.', '') image_format = '{}'.format(file_extension).encode() xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for anno in json_content['annotations']: if anno['label'] not in self.classes: continue xmins.append(anno['left'] / width) xmaxs.append(anno['right'] / width) ymins.append(anno['top'] / height) ymaxs.append(anno['bottom'] / height) classes_text.append(anno['label'].encode('utf8')) classes.append(self.class_to_int(anno['label'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_img), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(data, label_map_dict, image_dir, ignore_occluded=False): """Convert YAML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding YAML fields for a single image label_map_dict: A map from string label names to integers ids. image_dir: String specifying the directory holding the actual image data. ignore_occluded: Whether to skip occluded instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ filename = data['path'] img_path = os.path.join(image_dir, filename) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = PIL.Image.open(encoded_png_io) if image.format != 'PNG': raise ValueError('Image format not PNG') key = hashlib.sha256(encoded_png).hexdigest() width = 1280 height = 720 xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] for obj in data['boxes']: if ignore_occluded and obj['occluded']: continue xmin.append(float(obj['x_min']) / width) ymin.append(float(obj['y_min']) / height) xmax.append(float(obj['x_max']) / width) ymax.append(float(obj['y_max']) / height) class_name = obj['label'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def create_tf_example(annotation_dict_key, annotation_dict_value, image_path): """ Generates a tf Example for each image represented by dictionary key and the bounding boxes and labels represented by annotation_dict_value, with the image path """ #print(annotation_dict) image_format = b'png' #change to jpg or jpeg if required classes = [] xmins = [] xmaxs = [] ymins = [] ymaxs = [] #Read the images from the corresponding folder based on type bboxes_class = list(annotation_dict_value.split(" ")) if len(bboxes_class) % 5 == 0: #Exclude the annotated data and image if the length is incorrect print(bboxes_class) with tf.io.gfile.GFile(os.path.join(image_path, annotation_dict_key), 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) key = hashlib.sha256(encoded_png).hexdigest().encode('utf8') image = Image.open(encoded_png_io) width, height = image.size filename = annotation_dict_key.encode('utf8') for i in range(len(bboxes_class)): if i % 5 == 0: classes.append(int(bboxes_class[i])) elif i % 5 == 1: xmins.append(np.float32(bboxes_class[i]) / width) elif i % 5 == 2: ymins.append(np.float32(bboxes_class[i]) / width) elif i % 5 == 3: xmaxs.append(np.float32(bboxes_class[i]) / width) else: ymaxs.append(np.float32(bboxes_class[i]) / width) classes_text = list(map(int_to_class_label, classes)) #print(classes) #print(classes_text) #Extract bounding boxes and its labels and the corresponding file information as a tensorflow example #Fundamentally, a tf.Example is a {"string": tf.train.Feature} mapping. feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/key/sha256': dataset_util.bytes_feature(key) #Generate your own } tf_example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return tf_example
def json_to_tf_example(json_data, dataset_directory, label_map_dict ): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ width = int(json_data.get("image_width")) height = int(json_data.get("image_height")) filename=orig_filename = json_data.get("filename") full_path=orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename) with tf.gfile.GFile(orig_full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width,height=image.size # filename = json_data.get("filename")+".scaled.jpg" # #img_path = os.path.join(FLAGS.data_dir,"photos", filename) # full_path = os.path.join(FLAGS.data_dir,"photos", filename) # if not os.path.exists(full_path): # #raise ValueError('Please scale image :convert abc.jpg -resize 756x1008 sss.jpg') # orig_filename = json_data.get("filename") # orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename) # #image = PIL.Image.open(orig_full_path) # ##image.resize((756,1008), resample=PIL.Image.BILINEAR).save(full_path) # #image.resize((756,1008), resample=PIL.Image.NEAREST).save(full_path) # os.system("convert "+orig_full_path+" -resize 756x1008 "+full_path) #full_path = os.path.join(dataset_directory, img_path) #with tf.gfile.GFile(full_path, 'rb') as fid: # encoded_jpg = fid.read() #encoded_jpg_io = io.BytesIO(encoded_jpg) #image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width,height=image.size; xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in json_data.get("bndboxes"): difficult_obj.append(0) xmin.append(float(obj.get("x")) / width) ymin.append(float(obj.get("y")) / height) xmax.append(numpy.clip(float(obj.get("x")+obj.get("w")) / width,0,1)) ymax.append(numpy.clip(float(obj.get("y")+obj.get("h")) / height,0,1)) classes_text.append(obj.get("id").encode('utf8')) classes.append(label_map_dict[obj.get("id")]) truncated.append(int(0)) poses.append("Unspecified") width,height=image.size example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_examples(ids, images_dir, db_path): """Generates a TFRecord for given set of training examples. Inputs: ids - list of training example IDs images_dir - absolute path to directory containing images db_path - absolute path to database with metadata and labels """ # list of training examples examples = [] # open the database conn = sqlite3.connect(db_path) cursor = conn.cursor() # populate the list of training examples for id in ids: # get image dimensions img_path = images_dir + '/' + str(id) + '.jpg' img = cv.imread(img_path, cv.IMREAD_COLOR) height, width, channels = img.shape # populate dictionary with training example data cursor.execute("SELECT * FROM TrainingExample WHERE id=?", (id, )) row = cursor.fetchone() example = { 'id': row[0], 'img_path': img_path, 'img_height': height, 'img_width': width, 'camera_angle': row[2], 'camera_height': row[3], 'light_angle': row[4], 'labels': [] } # populate label information cursor.execute("SELECT * FROM Label WHERE image_id=?", (id, )) for row in cursor.fetchall(): example['labels'].append({ 'id': row[0], 'img_id': row[1], 'xmin': min(row[2], row[3]), 'xmax': max(row[2], row[3]), 'ymin': min(row[4], row[5]), 'ymax': max(row[4], row[5]) }) examples.append(example) # close the database cursor.close() conn.close() # Create TFRecord tf_examples = [] for example in examples: # encode image bytes encoded_image_data = tf.gfile.FastGFile(example['img_path'], 'rb').read() if len(example['labels']) != 1: continue # normalize label coordinates xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for label in example['labels']: xmins.append(label['xmin'] / example['img_width']) xmaxs.append(label['xmax'] / example['img_width']) ymins.append(label['ymin'] / example['img_height']) ymaxs.append(label['ymax'] / example['img_height']) classes_text.append(b'rock') classes.append(1) # populate a TFRecord entry for the training example tf_examples.append( tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(example['img_height']), 'image/width': dataset_util.int64_feature(example['img_width']), 'image/filename': dataset_util.bytes_feature(example['img_path'].encode( 'utf8')), 'image/source_id': dataset_util.bytes_feature(example['img_path'].encode( 'utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(b'jpeg'), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), }))) # Return the TFRecord entries return tf_examples
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, image_subdirectory='JPEGImages'): """ Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). 1image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join('/home/lion/dataset', data['file']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] ymin = [] xmax = [] ymax = [] poses = [] classes = [] classes_text = [] width = int(data['width']) height = int(data['height']) xmin.append(float(data['bbox']['x1']) / width) xmax.append(float(data['bbox']['x2']) / width) if int(data['category_class']) == 2: #excepyion about bottom ymin.append(float(data['bbox']['y1'] + 8) / height) else: ymin.append(float(data['bbox']['y1']) / height) if int(data['category_class']) == 1: #exception about top ymax.append(float(data['bbox']['y2'] - 8) / height) else: ymax.append(float(data['bbox']['y2']) / height) classes_text.append(data['category_name'].encode('utf8')) classes.append(int(data['category_class'])) difficult = [0] truncated = [0] poses.append('Frontal'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['file'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(data['_id']).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(image_path, img, xmin, ymin, xmax, ymax, classes, classes_text=(), truncated=(), poses=(), difficult_obj=(), source_id='', coordinate_normalize=False): """ 创建 tf example 实例 :param img: :param image_path: 图片路径 :param xmin: 多边形的boundingbox :param ymin: :param xmax: :param ymax: :param classes: 类别 :param classes_text: :param truncated: :param poses: :param difficult_obj: :param source_id: :return: 创建好的tf example """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() height = img.shape[0] width = img.shape[1] if coordinate_normalize: xmin = [float(x) / width for x in xmin] xmax = [float(x) / width for x in xmax] ymin = [float(y) / height for y in ymin] ymax = [float(y) / height for y in ymax] example = tf.train.Example( features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( os.path.basename(image_path).encode('utf8')), 'image/source_id': dataset_util.bytes_feature( os.path.basename(source_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(example, label_map_dict): mask_paths = get_mask_paths(example) with tf.io.gfile.GFile(example, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() classes = [] masks = [] bboxes = [] for label, mp in mask_paths.items(): mask = cv2.imread(mp, cv2.IMREAD_UNCHANGED) mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] ret, labels = cv2.connectedComponents(mask) # The first component is the background, so we skip it for l in range(1, ret): cardmask = np.zeros(labels.shape, dtype=np.uint8) cardmask[labels == l] = 1 if np.sum(cardmask) > 2000: bbox = bounding_box(cardmask) classes.append(label) masks.append(cardmask) bboxes.append(bbox) else: logging.info( "%s: object %s discarded, item too small. Size %d", example, label, np.sum(cardmask)) #height = image.shape[1] # Image height #width = image.shape[0] # Image width width, height = image.size filename = example encoded_image_data = encoded_jpg # Encoded image bytes image_format = 'jpeg' # b'jpeg' or b'png' xmins = [ bb[2] / width for bb in bboxes ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [bb[3] / width for bb in bboxes ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ bb[0] / width for bb in bboxes ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [bb[1] / width for bb in bboxes ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = map( lambda x: x.encode('utf8'), classes) # List of string class name of bounding box (1 per box) classes = list( map(lambda x: label_map_dict[x], classes)) # List of integer class id of bounding box (1 per box) encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/format': dataset_util.bytes_feature(image_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(encoded_mask_png_list) })) return tf_example
def create_example(xml_file): #Process the XML file tree = ET.parse(xml_file) root = tree.getroot() #Get image name, filename and width/height image_name = root.find('filename').text file_name = image_name.encode('utf8') size=root.find('size') width = int(size[0].text) height = int(size[1].text) #Init lists xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] #Iterate through objects and append info to lists for member in root.findall('object'): classes_text.append(member[0].text.encode('utf8')) xmin.append(float(member[4][0].text) / width) ymin.append(float(member[4][1].text) / height) xmax.append(float(member[4][2].text) / width) ymax.append(float(member[4][3].text) / height) difficult_obj.append(0) ##Convert text label to corresponding integer. ##Throw ValueError if class not valid def class_text_to_int(row_label): if row_label == 'lc': return 1 if row_label == 'd': return 2 if row_label == 'e': return 3 if row_label == 'f': return 4 if row_label == 'g': return 5 if row_label == 'a': return 6 if row_label == 'b': return 7 if row_label == 'hc': return 8 else: raise ValueError("Class name invalid") #Append class int classes.append(class_text_to_int(member[0].text)) #Append misc info to lists truncated.append(0) poses.append('Unspecified'.encode('utf8')) #Read corresponding image full_path = os.path.join('../Documents/GitHub/ump-poc/TensorFlow/workspace/training_demo/images/train', '{}'.format(image_name)) #provide the path of images directory #Read and encode JPG image with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) #Check image is JPG if image.format != 'JPEG': raise ValueError('Image format not JPEG') #Create SHA256 hash from image data key = hashlib.sha256(encoded_jpg).hexdigest() #create TFRecord Example example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(file_name), 'image/source_id': dataset_util.bytes_feature(file_name), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(example): """Converts an example from the cars196 dataset into a format suitable for the TensorFlow Object Detection API. Args: example: A dictionary containing at least the following keys: { 'image': A float32 Tensor of shape `[height, width, 3]`. 'bbox': A float32 Tensor of shape `[4]` containing the normalized bounding box coordinates in the order `[ymin, xmin, ymax, xmax]`. } Returns: encoded TFRecord example. """ height, width = example['image'].shape[:-1] filename = b'' # Filename of the image. Empty if image is not from file encoded_image_data = tf.image.encode_jpeg( example['image']).numpy() # Encoded image image_format = b'jpeg' ymins = [example['bbox'][0]] xmins = [example['bbox'][1]] ymaxs = [example['bbox'][2]] xmaxs = [example['bbox'][3]] # add 1 to label, since it was stored as [0, 195] instead of [1, 196] label = example['label'].numpy() + 1 classes_text = [CATEGORY_INDEX[label]['name'].encode('utf-8') ] # List of string class name of bounding box classes = [label] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(example): # TODO(user): Populate the following variables from your example. height = 640 # Image height width = 360 # Image width filename = example[ 'img_path'] # Filename of the image. Empty if image is not from file # encoded_image_data = imutils.resize(cv2.imread(filename), width=360).tobytes() # Encoded image bytes encoded_image_data = tf.gfile.FastGFile(filename, 'rb').read() decoded_image = tf.image.decode_jpeg(encoded_image_data) decoded_image_resized = tf.cast( tf.image.resize_images(decoded_image, [height, width]), tf.uint8) encoded_image_data = tf.image.encode_jpeg(decoded_image_resized) encoded_image_data = tf.Session().run(encoded_image_data) image_format = 'jpeg' # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box for i in range(len(example['boxes'])): xmins.append(float(example['boxes'][i][0])) xmaxs.append(float(example['boxes'][i][2])) ymins.append(float(example['boxes'][i][1])) ymaxs.append(float(example['boxes'][i][3])) classes_text = list() classes = list() for i in range(len(example['boxes'])): # classes_text.append('/m/01kb5c') # classes.append(546) if example['class'] == 21: classes.append(21) classes_text.append('/m/01xq0k1') elif example['class'] == 19: classes.append(19) classes_text.append('/m/03k3r') # if 'strange' in filename: # classes_text.append('/m/0jbk') # classes.append(13) # else: # classes_text.append('/m/01xq0k1') # classes.append(13) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, image_dir, label_map_dict): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Arguments: data: dict holding XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_dir: Path to image directory. label_map_dict: A map from string label names to integers ids. Returns: example: The converted tf.Example. """ full_path = os.path.join(image_dir, data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] try: for obj in data['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) except KeyError: print(data['filename'] + ' without objects!') difficult_obj = [0] * len(classes) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj) })) return example
def create_tf_example(image): # Bosch height = 960 # Image height width = 1280 # Image width filename = image['filename'] filename = filename.encode() with tf.gfile.GFile(filename, 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for sign in image['signs']: xmins.append(float(sign['xmin'] / width)) xmaxs.append(float(sign['xmax'] / width)) ymins.append(float(sign['ymin'] / height)) ymaxs.append(float(sign['ymax'] / height)) classes_text.append(sign['name'].encode()) classes.append(LABELS.index(sign['name']) + 1) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame.LabelName.map(lambda x: label_map[x]) .as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame.IsGroupOf.as_matrix().astype(int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame.IsTruncated.as_matrix().astype(int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame.IsDepiction.as_matrix().astype(int)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
def _create_tf_example(self, image_dir, image_filename): image_name = os.path.splitext(image_filename)[0] image_format = os.path.splitext(image_filename)[1] image_path = image_dir + "/" + image_filename with open(image_path, 'rb') as image_file: image_data = image_file.read() image_data = bytearray(image_data) img = Image.open(image_path) width, height = img.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] with open(self.bbox_dir + "/" + image_name + ".txt", 'r') as annotation_file: annotation_lines = annotation_file.readlines() annotation_lines.pop(0) for annotation_line in annotation_lines: coords = annotation_line.strip().split(" ") x1 = int(coords[0]) y1 = int(coords[1]) x2 = int(coords[2]) y2 = int(coords[3]) annotation_label = coords[4] annotation_class_id = self.labels.index(annotation_label) xmins.append(x1 / width) xmaxs.append(x2 / width) ymins.append(y1 / height) ymaxs.append(y2 / height) classes_text.append(bytes(annotation_label, 'utf-8')) classes.append(annotation_class_id) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(image_filename, 'utf-8')), 'image/source_id': dataset_util.bytes_feature(bytes(image_filename, 'utf-8')), 'image/encoded': dataset_util.bytes_feature(bytes(image_data)), 'image/format': dataset_util.bytes_feature(bytes(image_format, 'utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, dataset_directory, set_name, id, full_data): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset set_name: name of the set training, validation or test label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join(dataset_directory, 'images', data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_image = fid.read() encoded_image_io = io.BytesIO(encoded_image) image = PIL.Image.open(encoded_image_io) if image.format != 'JPEG' and image.format != 'PNG': raise ValueError('Image format not JPEG or PNG') key = hashlib.sha256(encoded_image).hexdigest() width = int(data['width']) height = int(data['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['ann_ids']: ann = full_data['annotations'][obj] xmin.append(float(ann['a_bbox'][0]) / width) ymin.append(float(ann['a_bbox'][1]) / height) xmax.append(float(ann['a_bbox'][2]) / width) ymax.append(float(ann['a_bbox'][3]) / height) classes_text.append( full_data['categories'][ann['cat_id'][0]]['name'].encode('utf8')) classes.append(int(ann['cat_id'][0])) # Not sure if these are needed but ill leave them for compatibility truncated.append(int(0)) poses.append("Unspecified".encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image.format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, label_map_dict, img_path): with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = image.width height = image.height xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] for shape in data['Layers']['Layer']['Shapes']['Shape']: text = shape['BlockText']['Text'].text if not (text.startswith('Panel') or text.startswith('panel')): continue attrib = shape['Data']['Extent'].attrib x = float(attrib['X']) y = float(attrib['Y']) w = float(attrib['Width']) h = float(attrib['Height']) xmin = x xmax = x + w ymin = y ymax = y + h xmin /= width ymin /= height xmax /= width ymax /= height if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01: print(img_path) xmins.append(xmin) ymins.append(ymin) xmaxs.append(xmax) ymaxs.append(ymax) class_name = 'Panel' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0] * len(xmin_norm) print(image_path) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature(annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature(annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature(annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature(annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature(annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature(annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature(annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature(annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(annotations['3d_bbox_rot_y']), })) return example
def create_tf_example(example): # Bosch height = 720 # Image height width = 1280 # Image width filename = example[ 'path'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['path'], 'rb') as fid: encoded_image = fid.read() image_format = 'png'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['boxes']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['x_min'] / width)) xmaxs.append(float(box['x_max'] / width)) ymins.append(float(box['y_min'] / height)) ymaxs.append(float(box['y_max'] / height)) classes_text.append(box['label'].encode()) classes.append(int(LABEL_DICT[box['label']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def main(_): # Write classes file os.makedirs(os.path.dirname(config.CLASSES_FILE), exist_ok=True) file = open(config.CLASSES_FILE, "w") for label, id in config.CLASSES.items(): file.write("item {\n\tid: " + str(id) + "\n\tname: '" + label + "'\n}\n") file.close() annotations = {} # Parse annotation file rows = open(config.ANNOTATION_PATH).read().strip().split("\n") # Ignore first line header for row in rows[1:]: # Get image path and bounding box coordinates row, _ = row.split(",") img_path, label, x1, y1, x2, y2, _ = row.split(";") if label not in config.CLASSES: continue img_path = os.path.sep.join([config.BASE_PATH, img_path]) x1, y1, x2, y2 = map(float, [x1, y1, x2, y2]) # Add image path and bounding box information to dictionary info = annotations.get(img_path, []) info.append((label, (x1, y1, x2, y2))) annotations[img_path] = info train_paths, test_paths = train_test_split(list(annotations.keys()), test_size=config.TEST_SIZE, random_state=42) datasets = [("train", train_paths, config.TRAIN_RECORD), ("test", test_paths, config.TEST_RECORD)] for type, img_paths, output_path in datasets: print(f"[INFO] Building {output_path} ...") writer = tf.python_io.TFRecordWriter(output_path) for img_path in tqdm.tqdm(img_paths): # Load image as tf object encoded = tf.gfile.GFile(img_path, "rb").read() # Get image size image = Image.open(io.BytesIO(encoded)) width, height = image.size[:2] # Get filename and format filename = img_path.split(os.path.sep)[-1] img_format = filename[filename.rfind(".") + 1:] # Get bounding boxes xmins, xmaxs = [], [] ymins, ymaxs = [], [] classes_texts, classes = [], [] for label, (x1, y1, x2, y2) in annotations[img_path]: xmins.append(x1 / width) xmaxs.append(x2 / width) ymins.append(y1 / height) ymaxs.append(y2 / height) classes_texts.append(label.encode("utf8")) classes.append(config.CLASSES[label]) features = tf.train.Features( feature={ "image/height": dataset_util.int64_feature(height), "image/width": dataset_util.int64_feature(width), "image/filename": dataset_util.bytes_feature(filename.encode("utf8")), "image/source_id": dataset_util.bytes_feature(filename.encode("utf8")), 'image/encoded': dataset_util.bytes_feature(encoded), "image/format": dataset_util.bytes_feature(img_format.encode("utf8")), "image/object/bbox/xmin": dataset_util.float_list_feature(xmins), "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs), "image/object/bbox/ymin": dataset_util.float_list_feature(ymins), "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs), "image/object/class/text": dataset_util.bytes_list_feature(classes_texts), "image/object/class/label": dataset_util.int64_list_feature(classes) }) example = tf.train.Example(features=features) writer.write(example.SerializeToString()) writer.close()
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature( annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature( annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature( annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature( annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature( annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature( annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature( annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature( annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( annotations['3d_bbox_rot_y']), })) return example
def createTfExample(singleFileData, path): # use TensorFlow's GFile function to open the .jpg image matching the current box data with tf.gfile.GFile( os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile: tensorFlowImage = tensorFlowImageFile.read() # end with # get the image width and height via converting from a TensorFlow image to an io library BytesIO image, # then to a PIL Image, then breaking out the width and height bytesIoImage = io.BytesIO(tensorFlowImage) pilImage = Image.open(bytesIoImage) width, height = pilImage.size # get the file name from the file data passed in, and set the image format to .jpg fileName = singleFileData.filename.encode('utf8') imageFormat = b'jpg' # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer xMins = [] xMaxs = [] yMins = [] yMaxs = [] classesAsText = [] classesAsInts = [] # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box) for index, row in singleFileData.object.iterrows(): xMins.append(row['xmin'] / width) xMaxs.append(row['xmax'] / width) yMins.append(row['ymin'] / height) yMaxs.append(row['ymax'] / height) classesAsText.append(row['class'].encode('utf8')) classesAsInts.append(classAsTextToClassAsInt(row['class'])) # end for # finally we can calculate and return the TensorFlow Example tfExample = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fileName), 'image/source_id': dataset_util.bytes_feature(fileName), 'image/encoded': dataset_util.bytes_feature(tensorFlowImage), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xMins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(yMins), 'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classesAsText), 'image/object/class/label': dataset_util.int64_list_feature(classesAsInts) })) return tfExample
def create_tf_record(output_filename, num_shards, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): img_path = os.path.join(read_bucket, example) if not os.path.isfile(img_path): continue with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] # 'coke', 'pepsi', 'coke'... classes = [] # 1, 2, 1... difficult_obj = [] truncated = [] poses = [] for annotation in annotations[example]: xmins.append(annotation['x']) xmaxs.append(annotation['x2']) ymins.append(annotation['y']) ymaxs.append(annotation['y2']) classes_text.append(annotation['label'].encode('utf8')) classes.append(1) # temporary, I need to assign labels to actual ids difficult_obj.append(0) truncated.append(0) poses.append(''.encode('utf8')) try: feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(example.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(example.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: print('Invalid example, ignoring.')
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) print('Mask append!!!') feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] print('len(masks)', len(masks)) for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_maps, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_maps: list of label maps (first = labels, second = countries) encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ label_map, country_map = label_maps filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] filtered_data_frame = filtered_data_frame[ filtered_data_frame.country.isin(country_map)] filtered_data_frame_boxes = filtered_data_frame[ ~filtered_data_frame.YMin.isnull()] filtered_data_frame_labels = filtered_data_frame[ filtered_data_frame.YMin.isnull()] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature( filtered_data_frame_boxes.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature( filtered_data_frame_boxes.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature( filtered_data_frame_boxes.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature( filtered_data_frame_boxes.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame_boxes.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x]) .as_matrix()), standard_fields.TfExampleFields.country_class_text: dataset_util.bytes_list_feature( filtered_data_frame_boxes.country.as_matrix()), standard_fields.TfExampleFields.country_class_label: dataset_util.int64_list_feature( filtered_data_frame_boxes.country.map(lambda x: country_map[x]) .as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsOccluded.as_matrix().astype( int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsTruncated.as_matrix().astype( int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsDepiction.as_matrix().astype( int)) if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: feature_map[standard_fields.TfExampleFields. image_class_label] = dataset_util.int64_list_feature( filtered_data_frame_labels.LabelName.map( lambda x: label_map[x]).as_matrix()) feature_map[standard_fields.TfExampleFields. image_class_text] = dataset_util.bytes_list_feature( filtered_data_frame_labels.LabelName.as_matrix()), return tf.train.Example(features=tf.train.Features(feature=feature_map))
def create_tf_example(file_name, masks, class_names, label_map_dict, image_directory, image_size=(768, 768)): height = image_size[0] width = image_size[1] xmins = [] ymins = [] xmaxs = [] ymaxs = [] encoded_masks = [] # Read image img_path = os.path.join(image_directory, file_name) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() # Look up class id class_ids = [label_map_dict[class_name] for class_name in class_names] # Encode class names into bytes class_names = [name.encode('utf8') for name in class_names] # Encode mask into png and get bounding box coordinates for mask in masks: mask_array = convert_mask_rle_to_img_array(mask) encoded_mask = convert_img_array_to_png_str(mask_array) encoded_masks.append(encoded_mask) xmin, xmax, ymin, ymax = get_bbox_coordinates(mask_array) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(class_names), 'image/object/class/label': dataset_util.int64_list_feature(class_ids), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example