def dict_to_tf_example(data, dataset_directory, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def _image_to_tfexample(image_name, annotation_name): """Generate a tf example by image and annotation file.""" image_data = tf.gfile.FastGFile(image_name, 'rb').read() tree = ElementTree.parse(annotation_name) root = tree.getroot() # image shape size = root.find('size') height = int(size.find('height').text) width = int(size.find('width').text) channels = int(size.find('depth').text) # image annotations xmin = [] xmax = [] ymin = [] ymax = [] labels = [] labels_text = [] difficult = [] truncated = [] for obj in root.findall('object'): label_name = obj.find('name').text labels.append(int(VOC_LABELS[label_name][0])) labels_text.append(label_name.encode('ascii')) if obj.find('difficult'): difficult.append(int(obj.find('difficult').text)) else: difficult.append(0) if obj.find('truncated'): truncated.append(int(obj.find('truncated').text)) else: truncated.append(0) bbox = obj.find('bndbox') xmin.append(float(bbox.find('xmin').text) / width) xmax.append(float(bbox.find('xmax').text) / width) ymin.append(float(bbox.find('ymin').text) / height) ymax.append(float(bbox.find('ymax').text) / height) example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': bytes_feature(image_data), 'image/format': bytes_feature(b'JPEG'), 'image/height': int64_feature(height), 'image/width': int64_feature(width), 'image/channels': int64_feature(channels), 'image/object/bbox/xmin': float_list_feature(xmin), 'image/object/bbox/xmax': float_list_feature(xmax), 'image/object/bbox/ymin': float_list_feature(ymin), 'image/object/bbox/ymax': float_list_feature(ymax), 'image/object/bbox/label': int64_list_feature(labels), 'image/object/bbox/text': bytes_list_feature(labels_text), 'image/object/bbox/difficult': int64_list_feature(difficult), 'image/object/bbox/truncated': int64_list_feature(truncated), })) return example
def create_tf_example(group, path): """Creates a tf.Example proto from sample buillding image tile. Args: encoded_building_image_data: The jpg encoded data of the building image. Returns: example: The created tf.Example. """ with tf.gfile.GFile(op.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for _, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(tags[row['class_num']].encode('utf8')) classes.append(row['class_num']) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example_other(self, example, filename): """ OTHER """ #print(filename) filename = filename.encode() with tf.gfile.GFile(filename, 'rb') as fid: encoded_image = fid.read() image = Image.open(filename) (width, height) = image.size image_string = np.array(image).tostring() #image_format = 'png'.encode() image_format = 'jpg'.encode() xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for box in example['annotations']: box_x = box['xmin'] box_y = box['ymin'] box_width = box['x_width'] box_height = box['y_height'] xmins.append(float(box_x / width)) xmaxs.append(float((box_x + box_width) / width)) ymins.append(float(box_y / height)) ymaxs.append(float((box_y + box_height) / height)) classes_text.append(box['class'].encode('utf-8')) print("[", box['class'].encode('utf-8'), "]") classes.append(int(DICT_LABEL_OTHER[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), #'image/object/class/text' : dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), #'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_string])), })) return tf_example
def create_tf_example(group, path, dictionary): with tf.io.gfile.GFile( os.path.join(path, '{}'.format(group.filename)) + '.jpg', 'rb') as fid: # with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:## THis is 2.0 tf version of gfile encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') # print(filename,path) image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'], dictionary)) # didt = {'quantity': 2, 'product': 1} # classes.append(didt[row['class']]) # print(classes) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(frame, label_map_dict): # TODO(user): Populate the following variables from your example. height = frame['height'] # Image height width = frame['width'] # Image width filename = '{}.jpg'.format( frame['frame_id'] ) # Filename of the image. Empty if image is not from file img_path = os.path.join(FLAGS.image_dir, filename) filename = filename.encode() with tf.gfile.GFile(img_path, 'rb') as fid: encoded_image_data = fid.read() # Encoded image bytes image_format = b'jpeg' # b'jpeg' or b'png' xmins = [ float(bbox[0]) / width for bbox in frame['bboxes'] ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [float(bbox[2]) / width for bbox in frame['bboxes'] ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ float(bbox[1]) / height for bbox in frame['bboxes'] ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [float(bbox[3]) / height for bbox in frame['bboxes'] ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [name.encode() for name in frame['names'] ] # List of string class name of bounding box (1 per box) classes = [label_map_dict[name] for name in frame['names'] ] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(bboxes, img_info, category_name2id,class_mapper={}): # TODO(user): Populate the following variables from your example. height = img_info['height'] width = img_info['width'] filename = img_info['path'] with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() image_format = img_info['format'] xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for bbox in bboxes: xmin = float(bbox['x1']) / width xmax = float(bbox['x1'] + bbox['width']) / width ymin = float(bbox['y1']) / height ymax = float(bbox['y1'] + bbox['height']) / height class_text = class_mapper[bbox['label']] if bbox['label'] in class_mapper else bbox['label'] if class_text == "__background__": continue class_id = category_name2id[class_text] xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(str(class_text)) classes.append(class_id) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(os.path.basename(filename)), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size # print('width, height',width, height) filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): print('row', row) xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) print('classes', classes) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(ex_name, ex_xmins, ex_xmaxs, ex_ymins, ex_ymaxs): # TODO(user): Populate the following variables from your example. image = Image.open(os.path.join(PATH, ex_name)) width, height = image.size ''' height = ex_height # Image height width = ex_width # Image width ''' filename = ex_name # Filename of the image. Empty if image is not from file encoded_image_data = image.tobytes() # Encoded image bytes image_format = b'jpg' # b'jpeg' or b'png' xmins = [ ex_xmins ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ex_xmaxs ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ex_ymins ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ex_ymaxs ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = ['Shoes' ] # List of string class name of bounding box (1 per box) classes = [91] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(group): encoded_img = object_storage.get_object(namespace, 'images', group.filename).data.content #with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: #encoded_jpg = fid.read() #encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(io.BytesIO(encoded_img)) if group.filename.endswith('.png'): image = image.convert('RGB') width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(row_labels[row['class']]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_img), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image_path, example, image_id): """ Create a tf_example using @example. @example is of form : ["ImageID", "XMin", "XMax", "YMin", "YMax"] which are the columns of "fish.csv". @example contains all bounding boxes for the image with @image_id """ filename = image_id+'.jpg' image_path = os.path.join(image_path, filename) with tf.gfile.GFile(image_path, 'rb') as fid: encoded_image_data = fid.read() filename = filename.encode() image = Image.open(image_path) width, height = image.size """ if filename == "147441736948406.jpg": image.show() """ del image image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] for bbox in np.array(example[['XMin', 'XMax', 'YMin', 'YMax']]): xmins += [bbox[0]] xmaxs += [bbox[1]] ymins += [bbox[2]] ymaxs += [bbox[3]] classes_text = [b'fish']*len(example) classes = [1]*len(example) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(name, image_dir, annot_dir): image_path = os.path.join(image_dir, name+'.jpg') annot_path = os.path.join(annot_dir, name+'.mat') annot_mat = parse_coordinates(annot_path) with tf.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) print (encoded_jpg) exit(1) image = Image.open(encoded_jpg_io) width, height = image.size filename = name.encode('utf8') image_format = b'jpg' # check if the image format is matching with your images. label = 'hand' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for coord in annot_mat: x_max, x_min, y_max, y_min = 0, float('inf'), 0, float('inf') for y, x in coord: x_max, x_min = max(x, x_max), min(x, x_min) y_max, y_min = max(y, y_max), min(y, y_min) # normalized cordinates # box cordinates in faster rcnn uses 0 and 1 to define the position of the bounding boxes. # so if my value is greater than 1, select 1 xmins.append(max(float(x_min) / width, 0.0)) ymins.append(max(float(y_min) / height, 0.0)) xmaxs.append(min(float(x_max) / width, 1.0)) ymaxs.append(min(float(y_max) / height, 1.0)) classes_text.append(label.encode('utf8')) classes.append(class_text_to_int(label)) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(entry): height = entry['height'] # Image height width = entry['width'] # Image width filename = entry['file_name'].encode( ) # Filename of the image. Empty if image is not from file image_format = b'jpeg' # b'jpeg' or b'png' encoded_image_data = open(BASE_DIR + filename.decode('ascii'), 'rb').read() # Encoded image bytes xmins = [ float(entry['bbox'][0] / width) ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ float((entry['bbox'][0] + entry['bbox'][2]) / width) ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ float(entry['bbox'][1] / height) ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ float((entry['bbox'][1] + entry['bbox'][3]) / height) ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [entry['name'].encode() ] # List of string class name of bounding box (1 per box) classes = [entry['category_id'] ] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example): filename = example['filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() encoded_jpg_io = io.BytesIO(encoded_image) image = Image.open(encoded_jpg_io) width, height = image.size image_format = 'jpg'.encode() xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: # adding box, one image may have multiple detected boxes if box['xmin'] + box['x_width'] > width or box['ymin']+ box['y_height'] > height: continue xmins.append(float(box['xmin']) / width) xmaxs.append(float(box['xmin'] + box['x_width']) / width) ymins.append(float(box['ymin']) / height) ymaxs.append(float(box['ymin']+ box['y_height']) / height) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_feature(image_file_path: pathlib.PosixPath, camera_token: str, corner_list: np.ndarray, image_width: int, image_height: int, boxes: List[Box]) -> tf.train.Example: box_feature_list = [(box.name, box.token, object_idx_dict[box.name]) for box in boxes] box_feature_list = list(map(list, zip(*box_feature_list))) BOX_NAME_INDEX = 0 BOX_TOKEN_INDEX = 1 BOX_NAME_ID_INDEX = 2 classes_text_list = [s.encode('utf-8') for s in box_feature_list[BOX_NAME_INDEX]] anns_token_list = [s.encode('utf-8') for s in box_feature_list[BOX_TOKEN_INDEX]] with tf.gfile.GFile(image_file_path.as_posix(), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() file_basename = image_file_path.as_posix() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature( file_basename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( camera_token.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(corner_list[:, 0] / float(image_width)), 'image/object/bbox/xmax': dataset_util.float_list_feature(corner_list[:, 1] / float(image_width)), 'image/object/bbox/ymin': dataset_util.float_list_feature(corner_list[:, 2] / float(image_height)), 'image/object/bbox/ymax': dataset_util.float_list_feature(corner_list[:, 3] / float(image_height)), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text_list), 'image/object/class/label': dataset_util.int64_list_feature(box_feature_list[2]), 'image/object/class/anns_id': dataset_util.bytes_list_feature(anns_token_list) } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def create_tf_example(row): full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [class_text_to_int(row['class'])] # print(classes) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(encoded_image_data, filename, x_min, x_max, y_min, y_max, classes_text, classes): """Creates a tf.Example proto from sample cat image. Args: encoded_cat_image_data: The jpg encoded data of the cat image. Returns: example: The created tf.Example. """ image_format = b'jpg' xmins = [x_min / width] xmaxs = [x_max / width] ymins = [y_min / height] ymaxs = [y_max / height] tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encoded_image_data.tobytes()), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(df, img_id): image_path = 'data/images/' filename = '%06d.png'%img_id image_path = os.path.join(image_path, filename) with tf.gfile.GFile(image_path, 'rb') as fid: encoded_image_data = fid.read() filename = filename.encode() image = Image.open(image_path) width, height = image.size del image image_format = b'png' xmins = [] xmaxs = [] ymins = [] ymaxs = [] for bbox in np.array(df[['XMin', 'XMax', 'YMin', 'YMax']]): xmins += [bbox[0]/width] xmaxs += [bbox[1]/width] ymins += [bbox[2]/height] ymaxs += [bbox[3]/height] classes_text = [b'Car']*len(df) classes = [1]*len(df) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(f,inputpath): # inputpath+filename->example # TODO(user): Populate the following variables from your example. height = 720 # Image height width = 1280 # Image width filename = f.split('.')[0].encode('utf8') # Filename of the image. Empty if image is not from file image_format = b'jpg' # b'jpeg' or b'png' # encoded_image_data = None # Encoded image bytes with tf.gfile.GFile(os.path.join(inputpath, f), 'rb') as fid: encoded_image_data = fid.read() image=Image.open(inputpath+f) width,height=image.size xmins = [0] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [0] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [0] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [0] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = ['Human'.encode('utf8')] # List of string class name of bounding box (1 per box) classes = [1] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example, path_root, LABEL_DICT): # import image f_image = Image.open(path_root + example["image_name"]) # get width and height of image width, height = f_image.size # crop image randomly around bouding box within a 0.15 * bbox extra range if FLAGS.evaluation_status != "test": left = example['x_1'] - round((random.random() * 0.15 + 0.05) * (example['x_2'] - example['x_1'])) top = example['y_1'] - round((random.random() * 0.15 + 0.05) * (example['y_2'] - example['y_1'])) right = example['x_2'] + round((random.random() * 0.15 + 0.05) * (example['x_2'] - example['x_1'])) bottom = example['y_2'] + round((random.random() * 0.15 + 0.05) * (example['y_2'] - example['y_1'])) if left < 0: left = 0 if right >= width: right = width if top < 0: top = 0 if bottom >= height: bottom = height f_image = f_image.crop((left, top, right, bottom)) _width, _height = width, height width, height = f_image.size # read image as bytes string encoded_image_data = io.BytesIO() f_image.save(encoded_image_data, format='jpeg') encoded_image_data = encoded_image_data.getvalue() filename = example[ "image_name"] # Filename of the image. Empty if image is not from file filename = filename.encode() image_format = 'jpeg'.encode() # b'jpeg' or b'png' if FLAGS.evaluation_status != "test": xmins = [ (example['x_1'] - left) / width ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ (example['x_2'] - left) / width ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ (example['y_1'] - top) / height ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ (example['y_2'] - top) / height ] # List of normalized bottom y coordinates in bounding box (1 per box) else: xmins = [ example['x_1'] / width ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ example['x_2'] / width ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ example['y_1'] / height ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ example['y_2'] / height ] # List of normalized bottom y coordinates in bounding box (1 per box) assert (xmins[0] >= 0.) and (xmaxs[0] < 1.01) and (ymins[0] >= 0.) and (ymaxs[0] < 1.01), \ (example, _width, _height, width, height, left, right, top, bottom, xmins, xmaxs, ymins, ymaxs) if width < 50 or height < 50 \ or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) < 0.2 \ or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) > 5.: return None if FLAGS.categories == 'broad': classes_text = [ LABEL_DICT[example['category_type']].encode() ] # List of string class name of bounding box (1 per box) classes = [example['category_type'] ] # List of integer class id of bounding box (1 per box) elif FLAGS.categories == 'fine': classes_text = [ example['category_name'].encode() ] # List of string class name of bounding box (1 per box) classes = [example['category_label'] ] # List of integer class id of bounding box (1 per box) else: raise (ValueError( "Incorrect value for flag categories. Must be 'broad' or 'fine'.")) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example): # Udacity sim data set height = 600 # Image height width = 800 # Image width filename = example[ 'filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin'] + box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def sub_img_to_tf_example(img_name, image, instanceImg): encoded_image = io.BytesIO() image.save(encoded_image, format='JPEG') key = hashlib.sha256(encoded_image.getvalue()).hexdigest() iimg_np = np.asarray(instanceImg).copy() iimg_vals = np.unique(iimg_np) assert (len(iimg_vals) > 0 and iimg_vals[0] == 0) instances = iimg_vals[1:] # tf.logging.debug("%s values: %s" % (img_name, iimg_vals)) # if FLAGS.debug: # tf.logging.log_every_n(tf.logging.INFO, "%s (%ix%i): %02i instances" % (img_name, imgWidth, imgHeight, num_instances), 100) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] masks = [] # tf.logging.log(tf.logging.DEBUG, '%i' % num_instances) for (i, j) in enumerate(instances): try: # images are encoded (id * 1000) + instance inst_id = j % 1000 inst_class = int((j - inst_id) / 1000) mask_bin = (iimg_np == j) mask = mask_bin.astype(np.uint8) * 2 # now mask is 0 or 2 mask_first_pixel = tuple( np.column_stack(np.where(mask == 2))[0][::-1]) # in allmost all cases this will just fill the single connected mask with ones cv2.floodFill(mask, None, mask_first_pixel, 1, flags=8 | cv2.FLOODFILL_FIXED_RANGE) # BUT in a few cases this will detect an additional, unconnected portion of the mask.. # most probably poison if not np.alltrue(mask <= 1): tf.logging.log( tf.logging.WARN, '%02i/%02i (%s) has a split mask' % (i, inst_class, img_name)) if FLAGS.vmasks: cv2.imshow('image', mask * 255) keyb = cv2.waitKey(0) if keyb == 27: sys.exit() cv2.destroyAllWindows() continue output = io.BytesIO() # encode the mask as png mask_png = Image.fromarray(mask) mask_png.save(output, format='PNG') # calculate a box arround the mask indices_x = np.any(iimg_np == j, axis=0) indices_y = np.any(iimg_np == j, axis=1) x_mask = np.where(indices_x) y_mask = np.where(indices_y) xmin = np.min(x_mask) xmax = np.max(x_mask) ymin = np.min(y_mask) ymax = np.max(y_mask) x_fraction = (xmax - xmin) / image.width y_frcation = (ymax - ymin) / image.height area = x_fraction * y_frcation if area < FLAGS.min_area: if area > 0: tf.logging.log( tf.logging.WARN, '%02i/%02i (%s) has area < treshold => %02.7f < %02.7f' % (i, inst_class, img_name, area, FLAGS.min_area)) else: tf.logging.log( tf.logging.ERROR, '%02i/%02i (%s) has area < treshold => %02.7f < %02.7f' % (i, inst_class, img_name, area, FLAGS.min_area)) continue # if FLAGS.debug: # mask_png.save(os.path.join(OUTPUT_DIR, '%s%02i_instances.png' % (img_name, i))) masks.append(output.getvalue()) xmins.append(xmin.astype(np.float) / image.width) xmaxs.append(xmax.astype(np.float) / image.width) ymins.append(ymin.astype(np.float) / image.height) ymaxs.append(ymax.astype(np.float) / image.height) classes.append(inst_class) # classes_text.append('traffic sign'.encode('utf8')) classes_text.append(class_mappings[inst_class].encode('utf8')) tf.logging.log( tf.logging.DEBUG, '%02i: (%04i,%04i), (%04i,%04i)' % (i, xmin, ymin, xmax, ymax)) except ValueError: # if FLAGS.debug: # instanceImg.save(os.path.join(OUTPUT_DIR, '%s%02i_instances.png' % (img_name, i))) # mask_png.save(os.path.join(OUTPUT_DIR, '%s%02i_single_mask.png' % (img_name, i))) tf.logging.warn( "%s (%ix%i): %02i instances/#%02i having invalid mask (not an instance):\nx-vals: %s \ny-vals: %s\n" % (img_name, image.width, image.heigth, len(instances) - 1, i, x_mask, y_mask)) continue # this image has no considerable boxes at all if len(classes) == 0: return None if FLAGS.debug: tf.logging.debug("%s: %02i instances used" % (img_name, len(masks))) # instanceImg.save(os.path.join(OUTPUT_DIR, '%s_%02i_instances.png' % (img_name, num_instances ))) feature_dict = { 'image/width': dataset_util.int64_feature(image.width), 'image/height': dataset_util.int64_feature(image.height), 'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image.getvalue()), 'image/format': dataset_util.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), # 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), # 'image/object/truncated': dataset_util.int64_list_feature(truncated), # 'image/object/view': dataset_util.bytes_list_feature(poses), 'image/object/mask': dataset_util.bytes_list_feature(masks) } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def parse_example(f, images_path): height = None # Image height width = None # Image width filename = None # Filename of the image. Empty if image is not from file encoded_image_data = None # Encoded image bytes image_format = b'jpeg' # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ ] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) poses = [] truncated = [] difficult_obj = [] raw_all_annot = [] filename = f.readline().rstrip() if not filename: raise FileNameIsNone() filepath = os.path.join(images_path, filename) if os.path.isfile(filepath) == False: raise IOError() face_num = int(f.readline().rstrip()) if not face_num: raise FaceNumIsNone() for i in range(face_num): annot = f.readline().rstrip().split() if not annot: raise Exception() raw_all_annot.append(annot) image_raw = cv2.imread(filepath) if image_raw is None: raise IOError() original_height, original_width, original_channel = image_raw.shape # aspect_ratio = original_width / original_height # if aspect_ratio < .9 or aspect_ratio > 1.1: # # image looses too much info if not square cropped # bg_i = -1 # biggest # bg_wh = 0 # for i in range(len(raw_all_annot)): # annot = raw_all_annot[i] # if float(annot[2]) > 25.0 and float(annot[3]) > 30.0: # sum = float(annot[2]) + float(annot[3]) # if sum > bg_wh: # bg_i = i # bg_wh = sum # # bg_annot = raw_all_annot[bg_i] # bg_box_center = (float(bg_annot[0]) + float(bg_annot[2]) / 2, float(bg_annot[1]) + float(bg_annot[3]) / 2) # min_d_start = min(bg_box_center[0], bg_box_center[1]) # min_d = min_d_start # dx_end = original_width - bg_box_center[0] # dy_end = original_height - bg_box_center[1] # min_d_end = min(dx_end, dy_end) # # if min_d_end < min_d_start: # min_d = min_d_end # # new_x_axis = bg_box_center[0] - min_d # new_y_axis = bg_box_center[1] - min_d # new_w = bg_box_center[0] + min_d # new_h = bg_box_center[1] + min_d # image_raw = image_raw[new_y_axis:new_h, new_x_axis:new_w] # raw_all_annot = [ # annot for annot in raw_all_annot if # float(bg_annot[0]) + float(bg_annot[2]) <= new_w and # float(bg_annot[1]) + float(bg_annot[3]) <= new_h # ] # raw_all_annot = [ # [float(annot[0]) - new_x_axis, float(annot[1]) - new_y_axis, annot[2], annot[3]] for annot in raw_all_annot # ] if config.RESIZE: image_raw = cv2.resize(image_raw, (config.RESIZE, config.RESIZE)) is_success, buffer = cv2.imencode(".jpg", image_raw) encoded_image_data = buffer.tobytes() # encoded_image_data = io.BytesIO(buffer) # encoded_image_data = open(filepath, "rb").read() # key = hashlib.sha256(encoded_image_data).hexdigest() key = '' height, width, channel = image_raw.shape scaleW = width / original_width scaleH = height / original_height for i in range(len(raw_all_annot)): annot = raw_all_annot[i] # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY if float(annot[2]) > 25.0 and float(annot[3]) > 30.0: xmins.append(max(0.005, (float(annot[0]) * scaleW) / width)) ymins.append(max(0.005, (float(annot[1]) * scaleH) / height)) xmaxs.append( min(0.995, ((float(annot[0]) + float(annot[2])) * scaleW) / width)) ymaxs.append( min(0.995, ((float(annot[1]) + float(annot[3])) * scaleH) / height)) classes_text.append(b'face') classes.append(1) poses.append("front".encode('utf8')) truncated.append(int(0)) if len(classes) == 0: return None to_str = lambda l: (str(x) for x in l) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf-8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), # 'image/array': dataset_util.float_list_feature( # (cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) / 255.).flatten().tolist()), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/encoded': dataset_util.bytes_feature(','.join( to_str(xmins + xmaxs + ymins + ymaxs)).encode('utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(int(0)), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return tf_example
def create_tf_example(name, img_dir, ann_dir): IMG_FILENAME = '%s.jpg' % name ANN_FILENAME = '%s.mat' % name IMG_FULL_PATH = os.path.join(img_dir, IMG_FILENAME) ANN_FULL_PATH = os.path.join(ann_dir, ANN_FILENAME) with tf.gfile.GFile(IMG_FULL_PATH, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() label = 'hand' width, height = image.size xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] coords = coords_from_mat(ANN_FULL_PATH) for coord in coords: x_max, x_min, y_max, y_min = 0, float('inf'), 0, float('inf') for y,x in coord: x_max, x_min = max(x, x_max), min(x, x_min) y_max, y_min = max(y, y_max), min(y, y_min) xmin.append(max(float(x_min) / width, 0.0)) ymin.append(max(float(y_min) / height, 0.0)) xmax.append(min(float(x_max) / width, 1.0)) ymax.append(min(float(y_max) / height, 1.0)) classes_text.append(label.encode('utf8')) classes.append(label_map_dict[label]) truncated.append(0) poses.append('Frontal'.encode('utf8')) difficult_obj.append(0) return tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( IMG_FILENAME.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( IMG_FILENAME.encode('utf8').encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), }))
def dict_to_tf_example(data_dict, dataset_directory): #img_path = os.path.join(data_dict['folder'], image_subdirectory, data_dict['filename']) #global roi full_path = data_dict['filename'] if not Path(full_path).exists(): full_path = os.path.join(dataset_directory, full_path) # for label image tools with tf.io.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() # if roi: # encoded_jpg_io = io.BytesIO(encoded_jpg) # image = PIL.Image.open(encoded_jpg_io) # image = image.crop(roi) # image.save('.temp.jpg') # with tf.io.gfile.GFile('.temp.jpg', 'rb') as tmp_fid: # encoded_jpg = tmp_fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': if image.format == "BMP": newJPEGPath = bmpToJpg(dataset_directory, data_dict['filename']) with tf.io.gfile.GFile(newJPEGPath, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) os.remove(newJPEGPath) # delete generate tmp file else: raise ValueError('Image format not JPEG or BMP') width = image.width #data_dict.get('width', image.width) height = image.height #data_dict.get('height', image.height) filename = data_dict['filename'] source_id = data_dict.get('source_id', filename) sha256 = hashlib.sha256(encoded_jpg).hexdigest() format = 'jpeg' xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # 至少需要一个 for obj in data_dict.get('objects', []): xmin.append(float(obj['xmin']) / width) ymin.append(float(obj['ymin']) / height) xmax.append(float(obj['xmax']) / width) ymax.append(float(obj['ymax']) / height) classes_text.append(obj['text'].encode('utf8')) classes.append(obj['label']) difficult_obj.append(obj.get('difficult', 0)) truncated.append(obj.get('truncated', 0)) poses.append(obj.get('pose', 'Unspecified').encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(source_id.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(sha256.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, label_map_dict, example, ignore_difficult_instances=False): try: dirname = os.path.dirname(example) basename = os.path.basename(example) filename = os.path.splitext(basename)[0] img_path = '{}.jpg'.format(os.path.join(dirname, filename)) object_name = img_path.split('/')[-2] except: print('error') return 0 try: if os.path.isfile(img_path): with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) signal = 0 else: signal = 1 except: print('error') return 0 if signal == 0: if image.format != 'JPEG': print('image format is not jpeg') return 0 else: try: width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] for obj in data['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_name = object_name classes_text.append(class_name) classes.append(label_map_dict[class_name]) except: return 0 try: example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode( 'utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode( 'utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(b'jpg'), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example except: return 0
def _dict_to_tf_example(data, dataset_directory, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # Get full image path img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) # Encode jpg image with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) depth = int(data['size']['depth']) shape = [height, width, depth] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] difficult = [] truncated = [] boxes = [] # For each detection in the image for obj in data['object']: difficult_b = bool(int(obj['difficult'])) # Ignore difficult objects for now if difficult_b: continue # Difficulty difficult.append(int(difficult_b)) if obj.get('truncated'): truncated.append(int(obj.get('truncated'))) else: truncated.append(0) # Classes c = int(VOC_LABELS[obj['name']][0]) classes.append(c) classes_text.append(obj['name'].encode('utf8')) # Normalized bounding boxes box_xmin = float(obj['bndbox']['xmin']) / width box_ymin = float(obj['bndbox']['ymin']) / height box_xmax = float(obj['bndbox']['xmax']) / width box_ymax = float(obj['bndbox']['ymax']) / height xmin.append(box_xmin) ymin.append(box_ymin) xmax.append(box_xmax) ymax.append(box_ymax) object_count = len(xmin) box = create_box(box_xmin, box_ymin, box_xmax, box_ymax, c) boxes.append(box) boxes = np.array(boxes) # Process the bounding boxes into a format that YOLO expects y_true = preprocess_true_boxes(boxes) # TFRecords can only store flat arrays y_true = y_true.reshape((-1)) features = { # Image file 'image/filename': bytes_feature(data['filename'].encode('utf8')), 'image/encoded': bytes_feature(encoded_jpg), 'image/format': bytes_feature('jpeg'.encode('utf8')), 'image/key/sha256': bytes_feature(key.encode('utf8')), 'image/source_id': bytes_feature(data['filename'].encode('utf8')), # Image features 'image/width': int64_feature(width), 'image/height': int64_feature(height), 'image/channels': int64_feature(depth), 'image/shape': int64_list_feature(shape), # Detection features 'image/object/difficult': int64_list_feature(difficult), 'image/object/truncated': int64_list_feature(truncated), # Classes 'image/object/class/text': bytes_list_feature(classes_text), 'image/object/class/label': int64_list_feature(classes), # Bounding box 'image/object/bbox/xmin': float_list_feature(xmin), 'image/object/bbox/xmax': float_list_feature(xmax), 'image/object/bbox/ymin': float_list_feature(ymin), 'image/object/bbox/ymax': float_list_feature(ymax), 'image/object/count': int64_feature(object_count), 'image/object/y_true': float_list_feature(y_true) } example = tf.train.Example(features=tf.train.Features(feature=features)) return example
def get_examples(img_path): label_path = os.path.splitext(img_path)[0] + '.txt' if os.path.exists(label_path) is False: return False, None with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': print("file format error " + img_path) return False, None key = hashlib.sha256(encoded_jpg).hexdigest() examples = [] for line in open(label_path): data = line.split() xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] width = data[0] height = data[1] file_name = data[2] image_format = data[3] xmin.append(float(data[4])) xmax.append(float(data[5])) ymin.append(float(data[6])) ymax.append(float(data[7])) classes.append(int(data[8])) classes_text.append(data[9].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(file_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) examples.append(example) return True, examples
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult_obj.append(int(0)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_name = obj['name'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(0)) poses.append('Unspecified'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] filtered_data_frame_boxes = filtered_data_frame[~filtered_data_frame.YMin. isnull()] filtered_data_frame_labels = filtered_data_frame[ filtered_data_frame.YMin.isnull()] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature( filtered_data_frame_boxes.YMin.as_matrix().astype(np.float)), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature( filtered_data_frame_boxes.XMin.as_matrix().astype(np.float)), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature( filtered_data_frame_boxes.YMax.as_matrix().astype(np.float)), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature( filtered_data_frame_boxes.XMax.as_matrix().astype(np.float)), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame_boxes.LabelName.map( lambda x: x.encode()).as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame_boxes.LabelName.map( lambda x: label_map[x]).as_matrix().astype(np.int64)), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id).encode()), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id.encode()), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsGroupOf.as_matrix().astype( int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsTruncated.as_matrix().astype(int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[ standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame_boxes.IsDepiction.as_matrix().astype(int)) if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns: feature_map[standard_fields.TfExampleFields. image_class_label] = dataset_util.int64_list_feature( filtered_data_frame_labels.LabelName.map( lambda x: label_map[x]).as_matrix()) feature_map[standard_fields.TfExampleFields. image_class_text] = dataset_util.bytes_list_feature( filtered_data_frame_labels.LabelName.as_matrix()), return tf.train.Example(features=tf.train.Features(feature=feature_map))