def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def group_to_tf_record(point, image_directory): format_png = b'png' format_jpg = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] class_nums = [] class_ids = [] # changed point[0] to point as is just one point image_id = point['id'] if image_id.startswith('frame'): filename = os.path.join(image_directory, image_id + '.png') format = format_png else: filename = os.path.join(image_directory, image_id + '.jpg') #.decode() format = format_jpg try: image = Image.open(filename) width, height = image.size with tf.gfile.GFile(filename, 'rb') as fid: encoded_image = bytes(fid.read()) except: return None key = hashlib.sha256(encoded_image).hexdigest() for anno in point['annotations']: xmins.append(float(anno['x0'])) xmaxs.append(float(anno['x1'])) ymins.append(float(anno['y0'])) ymaxs.append(float(anno['y1'])) class_nums.append(anno['class_num']) class_ids.append(bytes(anno['label'].encode('utf8'))) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))), 'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(class_ids), 'image/object/class/label': dataset_util.int64_list_feature(class_nums) })) return tf_example
def create_tf_example(example): # Udacity real data set height = 600 # Image height width = 800 # Image width filename = example['filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin']+ box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (image and and corresponding label) Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # print (os.path.join(image_subdirectory, data['filename'] + '.bmp')) img_path = data['filename'] with tf.gfile.GFile(img_path) as fid: encoded_img = fid.read() encoded_img_io = io.BytesIO(encoded_img) image = PIL.Image.open(encoded_img_io) if image.format == 'PNG': img_format = 'png' elif image.format == 'JPEG': img_format = 'jpeg' elif image.format == 'BMP': img_format = 'bmp' else: raise ValueError('Image format not PNG/JPEG/BMP') key = hashlib.sha256(encoded_img).hexdigest() (width, height) = image.size class_id = data['class_id'] class_text = data['class_text'] example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/channels': dataset_util.int64_feature(3), 'image/filename': dataset_util.bytes_feature(data['filename']), 'image/source_id': dataset_util.bytes_feature(data['filename']), 'image/key/sha256': dataset_util.bytes_feature(key), 'image/encoded': dataset_util.bytes_feature(encoded_img), 'image/format': dataset_util.bytes_feature(img_format), 'image/class/text': dataset_util.bytes_feature(class_text), 'image/class/label': dataset_util.int64_feature(class_id), })) return example
def main(unused_argv): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) count = 0 with open(FLAGS.tags_file_path) as fo: for line in fo: filename, groundtruth_text = line.strip().split(' ') if '.' in groundtruth_text: nop_gt = groundtruth_text.replace('.', '') else: nop_gt = groundtruth_text image_path = os.path.join(FLAGS.images_path, filename) print("filename: {}, gt: {}, nopoint: {}".format( filename, groundtruth_text, nop_gt)) # image_path = line.strip() # filename = '/'.join(line.strip().split('/')[-2:]) # groundtruth_text = line.split('_')[1] try: height, width, channel = cv2.imread(image_path).shape image_bin = open(image_path, 'rb').read() except Exception as e: print(e) continue example = tf.train.Example(features=tf.train.Features( feature={ fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(image_bin), fields.TfExampleFields.height: dataset_util.int64_feature(height), fields.TfExampleFields.width: dataset_util.int64_feature(width), fields.TfExampleFields.filename: dataset_util.bytes_feature(filename.encode()), fields.TfExampleFields.transcript: dataset_util.bytes_feature(groundtruth_text.encode()), fields.TfExampleFields.transcript2: dataset_util.bytes_feature(nop_gt.encode()) })) writer.write(example.SerializeToString()) count += 1 if count % 1000 == 0: print(count) writer.close() print('{} example finished!'.format(count))
def create_tf_example(filename): coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2] leftUp, rightDown = [[int(eel) for eel in el.split('&')] for el in coordinates.split('_')] xmin, ymin = leftUp xmax, ymax = rightDown with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) height = image.height width = image.width key = hashlib.sha256(encoded_jpg).hexdigest() ymins = [float(ymin) / height] xmins = [float(xmin) / width] ymaxs = [float(ymax) / height] xmaxs = [float(xmax) / width] labels_text = ['vehicle plate'.encode('utf8')] labels = [2] # print("---------image size:",image.size) # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax)) # print("---------width:{}, height:{}".format(width,height)) feature_dict = { 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels_text), 'image/object/class/label': dataset_util.int64_list_feature(labels), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def make_example(filename, label): with tf.gfile.GFile(filename, 'rb') as fid: image = fid.read() return tf.train.Example(features=tf.train.Features( feature={ 'label': dataset_util.int64_feature(label), 'image': dataset_util.bytes_feature(image) }))
def create_tfdatapoint(file_loc, file, labels): img = Image.open(os.path.join(file_loc, 'images', file)) (width, height) = img.size encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file), "rb").read() encoded = bytes(encoded) image_format = b'png' filename = file.split('.')[0] data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt')) data = data.reshape(int(data.size / 5), 5) classes = [int(x) for x in data[:, 0]] classes_text = [labels[x].encode('utf8') for x in classes] xmins = data[:, 1] - (data[:, 3] / 2.0) xmaxs = data[:, 1] + (data[:, 3] / 2.0) ymins = data[:, 2] - (data[:, 4] / 2.0) ymaxs = data[:, 2] + (data[:, 4] / 2.0) tf_label_and_data = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/source_id': dataset_util.bytes_feature(str.encode(filename)), 'image/encoded': dataset_util.bytes_feature(encoded), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_label_and_data
def create_tf_example(group, path): # Opening and readinf the files with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() # Encode the image in jpeg format to array values encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) # Setting up the image size width, height = image.size #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin']) xmaxs.append(row['xmax']) ymins.append(row['ymin']) ymaxs.append(row['ymax']) classes.append(row['class'].encode('utf8')) # This is already exisiting code to convert csv to tfrecord tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.bytes_list_feature( classes), })) return tf_example
def dict_to_coco_example(img_data): """Convert python dictionary formath data of one image to tf.Example proto. Args: img_data: infomation of one image, inclue bounding box, labels of bounding box,\ height, width, encoded pixel data. Returns: example: The converted tf.Example """ bboxes = img_data['bboxes'] xmin, xmax, ymin, ymax = [], [], [], [] for bbox in bboxes: xmin.append(bbox[2]) xmax.append(bbox[0]) ymin.append(bbox[3]) ymax.append(bbox[1]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(img_data['height']), 'image/width': dataset_util.int64_feature(img_data['width']), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']), 'image/object/class/text': dataset_util.bytes_list_feature(img_data['text']), 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')), 'image/object/class/file': dataset_util.bytes_feature(img_data['file'].encode('utf-8')), })) return example
def __create_tf_example(frame_data, sorted_label_list): im = PIL.Image.open(io.BytesIO(frame_data.image)) arr = io.BytesIO() if frame_data.format == 'jpg': format = 'JPEG' else: format = frame_data.format.upper() im.save(arr, format=format) height = im.height width = im.width encoded_image_data = arr.getvalue() rects, labels = bbox_writer.convert_text_to_rects_and_labels(frame_data.bboxes_text) # List of normalized coordinates, 1 per box, capped to [0, 1] xmins = [max(min(rect[0] / width, 1), 0) for rect in rects] # left x xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects] # right x ymins = [max(min(rect[1] / height, 1), 0) for rect in rects] # top y ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects] # bottom y classes_txt = [label.encode('utf-8') for label in labels] # String names label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)} class_ids = [label_to_id_dict[label] for label in labels] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(frame_data.format.encode('utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_txt), 'image/object/class/label': dataset_util.int64_list_feature(class_ids), })) label_counter_for_frame = collections.Counter(labels) is_negative = len(rects) == 0 return tf_example, label_counter_for_frame, is_negative
def create_tf_example(height, width, filename, encoded_image_data, image_format, xmins, xmaxs, ymins, ymaxs, classes_text, classes): tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), # Image height 'image/width': dataset_util.int64_feature(width), # Image width 'image/filename': dataset_util.bytes_feature(filename), # Filename of the image 'image/source_id': dataset_util.bytes_feature(filename), # Filename of the image 'image/encoded': dataset_util.bytes_feature( encoded_image_data), # Encoded image bytes 'image/format': dataset_util.bytes_feature(image_format), # b'jpeg' or b'png' 'image/object/bbox/xmin': dataset_util.float_list_feature( xmins), # normalized left x coordinate in bounding box 'image/object/bbox/xmax': dataset_util.float_list_feature( xmaxs), # normalized right x coordinate in bounding box 'image/object/bbox/ymin': dataset_util.float_list_feature( ymins), # normalized top y coordinate in bounding box 'image/object/bbox/ymax': dataset_util.float_list_feature( ymaxs), # normalized bottom y coordinate in bounding box 'image/object/class/text': dataset_util.bytes_list_feature( classes_text), # string class name of bounding box 'image/object/class/label': dataset_util.int64_list_feature( classes), # integer class id of bounding box })) return tf_example
def dict_to_tf_example(data, label): with open(data, 'rb') as inf: encoded_data = inf.read() img_label = cv2.imread(label) img_mask = image2label(img_label) encoded_label = img_mask.astype(np.uint8).tobytes() height, width = img_label.shape[0], img_label.shape[1] if height < vgg_16.default_image_size or width < vgg_16.default_image_size: # 保证最后随机裁剪的尺寸 return None fname = data[data.rfind('/') + 1:] feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fname.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_data), 'image/label': dataset_util.bytes_feature(encoded_label), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(input_features, image_dir='/'): image_path = input_features['image'] label = input_features['label'] image_path = os.path.join(image_dir, image_path) image = cv2.imread(image_path) encoded_jpg = cv2.imencode('.jpg', image)[1].tostring() feature_dict = { 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/label': dataset_util.int64_feature(label), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(csv, img_dir): img_fname = csv[0] x1, y1, x2, y2 = list(map(int, csv[1:-1])) cls_idx = int(csv[-1]) cls_text = config.CLASS_NAMES[cls_idx].encode('utf8') with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size xmin = [x1 / width] xmax = [x2 / width] ymin = [y1 / height] ymax = [y2 / height] cls_text = [cls_text] cls_idx = [cls_idx] filename = img_fname.encode('utf8') image_format = b'jpg' tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(cls_text), 'image/object/class/label': dataset_util.int64_list_feature(cls_idx), })) return tf_example
def dict_to_tf_example(data, label): print("data----", data) with open(data, 'rb') as inf: encoded_data = inf.read() #print("encoded_data----",encoded_data) img_label = cv2.imread(label) img_mask = image2label(img_label) encoded_label = img_mask.astype(np.uint8).tobytes() data_img = img_label.astype('int32') idx = (data_img[:, :, 2] * 256 + data_img[:, :, 1]) * 256 + data_img[:, :, 0] height, width = img_label.shape[0], img_label.shape[1] if height < vgg_16.default_image_size or width < vgg_16.default_image_size: # 保证最后随机裁剪的尺寸 return None # Your code here, fill the dict feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(re.split('\/+', data)[-1].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_data), 'image/label': dataset_util.bytes_feature(encoded_label), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), #自己写不行?? } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def dict_to_coco_example(img_data): """Convert python dictionary formath data of one image to tf.Example proto. Args: img_data: infomation of one image, inclue bounding box, labels of bounding box,\ height, width, encoded pixel data. Returns: example: The converted tf.Example """ # bboxes = img_data['bboxes'] # xmin, xmax, ymin, ymax = [], [], [], [] # for bbox in bboxes: # xmin.append(bbox[0]) # xmax.append(bbox[0] + bbox[2]) # ymin.append(bbox[1]) # ymax.append(bbox[1] + bbox[3]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/id': dataset_util.int64_feature(img_data['id']), 'image/caption': dataset_util.bytes_list_feature(img_data['caption']), 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']) })) return example
def _encode_image_to_tfrecord(image_path, category_id): with tf.io.gfile.GFile(image_path, 'rb') as fid: encoded_jpg = fid.read() image_name = image_path.split('/')[-1] feature_dict = { 'image_name': dataset_util.bytes_feature(image_name.encode('utf8')), 'encoded_image': dataset_util.bytes_feature(encoded_jpg), 'category_id': dataset_util.int64_feature(category_id), 'format': dataset_util.bytes_feature('jpeg'.encode('utf8')) } return tf.train.Example(features=tf.train.Features(feature=feature_dict))
def json_to_record(j): assert (len(j["image_size"]) == 1) assert (len(j["categories"]) == len(j["annotations"])) image_size = j["image_size"][0] height = image_size["height"] width = image_size["width"] filename = os.path.basename(j["file"]) # actual image bytes? refer to dataset_tools/create_pet_tf_record.py with tf.gfile.GFile(j["file"], "rb") as fid: encoded_jpg = fid.read() pass encoded_image_data = encoded_jpg image_format = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for annot in j["annotations"]: c_name = class_id_to_name(annot["class_id"]) classes_text.append(c_name.encode("utf8")) # class_ids are indexed by 1 for tensorflow classes.append(annot["class_id"] + 1) corners = get_box_corners(annot) xmins.append(corners["xmin"] / width) xmaxs.append(corners["xmax"] / width) ymins.append(corners["ymin"] / height) ymaxs.append(corners["ymax"] / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode("utf8")), 'image/source_id': dataset_util.bytes_feature(filename.encode("utf8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) # print(tf_example) return tf_example pass
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def _create_tf_entry(self, categories, img, label, filename, annotations): imageFormat = b'jpg' width, height = img.size imgByteArr = io.BytesIO() img.save(imgByteArr, format='JPEG') encodedImageData = imgByteArr.getvalue() xmins = [] xmaxs = [] ymins = [] ymaxs = [] for annotation in annotations: rect = None if type( annotation.data ) is Rectangle: #currently we only support Rect annotations, TODO: change me rect = annotation.data elif type(annotation.data) is Polygon: rect = annotation.data.rect if rect is not None: trimmed_rect = rect.trim( Rectangle(0, 0, width, height) ) #scale to image dimension in case annotation exceeds image width/height if trimmed_rect.left < 0: raise ImageMonkeyGeneralError( "trimmed rect left dimension invalid! (<0)") if trimmed_rect.top < 0: raise ImageMonkeyGeneralError( "trimmed rect top dimension invalid! (<0)") if trimmed_rect.width < 0: raise ImageMonkeyGeneralError( "trimmed rect width dimension invalid! (<0)") if trimmed_rect.height < 0: raise ImageMonkeyGeneralError( "trimmed rect height dimension invalid! (<0)") if (trimmed_rect.left + trimmed_rect.width) > width: raise ImageMonkeyGeneralError( "bounding box width > image width!") if (trimmed_rect.top + trimmed_rect.height) > height: raise ImageMonkeyGeneralError( "bounding box height > image height!") xmin = trimmed_rect.left / float(width) xmax = (trimmed_rect.left + trimmed_rect.width) / float(width) ymin = trimmed_rect.top / float(height) ymax = (trimmed_rect.top + trimmed_rect.height) / float(height) #sanity checks if xmin > xmax: raise ImageMonkeyGeneralError("xmin > xmax!") if ymin > ymax: raise ImageMonkeyGeneralError("ymin > ymax!") if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0): continue #skip bounding boxes that are 0 xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) #we might have some images in our dataset, which don't have a annotation, skip those if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0) or (len(ymaxs) == 0)): return None classes = [(categories.index(label) + 1)] * len( xmins) #class indexes start with 1 labels = [label.encode('utf8')] * len(xmins) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encodedImageData), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(filename, label_file): img = cv2.imread(filename) height, width, channels = img.shape with tf.gfile.GFile(filename, 'rb') as fid: encoded_image_data = fid.read() image_format = b'jpg' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) with open(label_file, 'r') as f: csvreader = csv.reader(f, delimiter=' ') head = True for row in csvreader: if head: head = False continue name = row[-1] classes_text.append(name) classes.append(get_index(name)) xmins.append(float(row[0]) / width) xmaxs.append(float(row[2]) / width) ymins.append(float(row[1]) / height) ymaxs.append(float(row[3]) / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def xml_to_tf(path_input, path_output): xml_list = [] column_name = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] print(path_output) writer = tf.io.TFRecordWriter(path_output) files = os.listdir(path_input) for file in files: if file.endswith(".xml"): xmlFile = path_input + file tree = ET.parse(xmlFile) root = tree.getroot() filename = root[1].text width = int(root[4][0].text) height = int(root[4][1].text) xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for member in root.findall('object'): beer = member[0].text xmin = int(member[4][0].text) ymin = int(member[4][1].text) xmax = int(member[4][2].text) ymax = int(member[4][3].text) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append(beer.encode('utf8')) classes.append(class_text_to_int(beer)) with tf.io.gfile.GFile( os.path.join(path_input, '{}'.format(filename)), 'rb') as fid: encoded_jpg = fid.read() print(encoded_jpg) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(IMAGE_FORMAT), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), path_output) print('Successfully created the TFRecords: {}'.format(output_path))
def dict_to_tf_example(data, image_path, label_map_dict, ignore_difficult_instances=False, image_subdirectory='images'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_path: Full path to image file label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # img_path = os.path.join( # data['folder'], image_subdirectory, data['filename']) # full_path = os.path.join(dataset_directory, img_path) full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) filename = full_path.split('/')[-1] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = False # bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) # truncated.append(int(obj['truncated'])) truncated.append(0) # poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def background_tf_example(image_path, ): """ Args: image_path: Full path to image file Returns: example: The converted tf.Example. """ full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() filename = full_path.split('/')[-1] width = image.width height = image.height xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): # image_height = image[2] # image_width = image[1] # filename = image[0]# TODO(user): Populate the following variables from your example. # print(image) height = image['height'] # Image height width = image['width'] # Image width filename = image[ 'filename'] # Filename of the image. Empty if image is not from file full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_image_io = io.BytesIO(encoded_jpg) # Encoded image bytes image = PIL.Image.open(encoded_image_io) only_file_name, image_format = os.path.splitext(filename) xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per bo) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) # print(len(annotations_list)) for annotation in annotations_list: # print(annotation) xmins.append(annotation['xmin'] / width) xmaxs.append(annotation['xmax'] / width) ymins.append(annotation['ymin'] / height) ymaxs.append(annotation['ymax'] / height) classes_text.append(annotation['label_text'].encode('utf8')) classes.append(annotation['label']) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def read_xml_make_tfrecord(): num_data = 8 for i in range(num_data): globals()['train_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data))] = tensorflow.io.TFRecordWriter( 'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data))) for i in range(int(num_data / 8)): globals()['test_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) globals()['valid_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) length = len(os.listdir(folder)) for number, img_name in enumerate(os.listdir(folder)): if img_name[-4:] != '.jpg': continue filename = img_name[:-4] img = cv2.imread(folder + filename + ".jpg") height, width = img.shape[:2] mask = cv2.imread('mask/' + filename + '.jpg', 0) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) cv2.imshow("asdas", mask) cv2.waitKey() _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) print(contours) contours = sorted(contours, key=lambda x: len(x), reverse=True) x = [temp[0][0] for temp in contours[0]] y = [temp[0][1] for temp in contours[0]] xmin = min(x) xmax = max(x) ymin = min(y) ymax = max(y) # cv2.circle(img,(xmin,ymin),5,(255,0,0),5) # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5) # cv2.imshow("asd",img) # cv2.waitKey() object_name = 'passport' pixel_val = 255 with tensorflow.io.gfile.GFile(folder + filename + ".jpg", 'rb') as fid: encoded_image_data = fid.read() key = hashlib.sha256(encoded_image_data).hexdigest() with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg", 'rb') as fid: encoded_mask_data = fid.read() encoded_mask = io.BytesIO(encoded_mask_data) mask = Image.open(encoded_mask) mask_np = np.asarray(mask.convert('L')) mask_remapped = (mask_np == pixel_val).astype(np.uint8) # print("mask",mask_remapped.shape) # cv2.imshow("asd",mask_remapped*255) # cv2.waitKey() mask_img = Image.fromarray(mask_remapped) output = io.BytesIO() mask_img.save(output, format='PNG') xmins = [xmin / width] xmaxs = [xmax / width] ymins = [ymin / height] ymaxs = [ymax / height] classes_text = [object_name.encode('utf8')] classes = [1] masks = [output.getvalue()] print(img_name) print(xmins) print(xmaxs) print(ymins) print(ymaxs) print(classes_text) print(classes) print(masks) example = tensorflow.train.Example(features=tensorflow.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(masks), })) if number < length * 0.8: globals()['train_writer_{:05d}-of-{:05d}'.format( int(number / (length * 0.8) * num_data), int(num_data))].write(example.SerializeToString()) elif number < length * 0.9: globals()['valid_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.8) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString()) elif number < length: globals()['test_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.9) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString())
def dict_to_tf_example(writer, data): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join(data['folder'], data['filename']).replace( '/home/data/usrs/jiangyz/images', '/data1/chenyf') #print('full_path%s'%full_path) OriImg = PIL.Image.open(full_path) if (OriImg.mode != 'RGB'): OriImg = OriImg.convert("RGB") print(full_path + ' is not a rgb image, converting...') OriImgArray = np.asarray(OriImg) w = int(OriImgArray.shape[1]) h = int(OriImgArray.shape[0]) for obj in data['object']: difficult = bool(int(obj['difficult'])) if difficult: print('there is a difficult instance.....') raw_input() continue left = int(obj['bndbox']['xmin']) top = int(obj['bndbox']['ymin']) right = int(obj['bndbox']['xmax']) down = int(obj['bndbox']['ymax']) # if (right-left)*(down-top)<w*h/4: # continue difficult_obj = int(difficult) imgSinglePerson = OriImg imgSingle = np.asarray(imgSinglePerson) img_raw = imgSingle.tostring() classes_text = obj['name'].encode('utf8') classes = 0 kp_cor_v = [int(x) for x in obj['keypoints']['visible']] truncated = int(obj['truncated']) poses = obj['pose'].encode('utf8') kpNum = FLAGS.kpNum if (sum(1 for x in kp_cor_v if x) < (kpNum + 1) / 2): continue kp_cor = [] for tmp_id in range(kpNum): if kp_cor_v[tmp_id] != 0: #convert to imgSinglePerson xc = int(obj['keypoints']['x'][tmp_id]) yc = int(obj['keypoints']['y'][tmp_id]) kp_cor.append((xc, yc)) global _all_num _all_num += 1 #tf.summary.image('image',tf.convert_to_tensor(np.array([OriImgArray])),1) #show images #color = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255),(255,0,255),(0,0,0)] heatmap = np.zeros([h, w, kpNum], np.float32) #print('shape of heatmap=',np.shape(heatmap)) sigma = 10 for idx, cor_xy in enumerate(kp_cor): cor_x, cor_y = cor_xy r = 36 # int(8/96.0*224) for ii in range(-r, r + 1, 1): for jj in range(-r, r + 1, 1): xxxx = cor_x + ii yyyy = cor_y + jj if (xxxx < 0) or (yyyy < 0) or (xxxx > w - 1) or (yyyy > h - 1): continue heatmap[yyyy, xxxx, idx] += np.exp(-(ii * ii + jj * jj) / (2 * sigma * sigma)) #heatmap[yyyy,xxxx]=255 heatmap[heatmap > 1] = 1.0 #print('length of heatmap=',len(hm_raw)) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(h), 'image/width': dataset_util.int64_feature(w), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(img_raw), 'image/heatmap': dataset_util.float_list_feature(heatmap.flatten()), 'image/keypointnumber': dataset_util.int64_feature(kpNum), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/class/text': dataset_util.bytes_feature(classes_text), 'image/object/class/label': dataset_util.int64_feature(classes), 'image/object/difficult': dataset_util.int64_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_feature(truncated), 'image/object/view': dataset_util.bytes_feature(poses), })) writer.write(example.SerializeToString())
def create_tf_example(image, image_dir, bbox_annotations=None, category_index=None, include_mask=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids', u'neg_category_ids'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official LVIS dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing LVIS category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_mask: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: success: whether the conversion is successful filename: image filename example: The converted tf.Example Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['coco_url'] filename = osp.join(*filename.split('/')[-2:]) image_id = image['id'] image_not_exhaustive_category_ids = image['not_exhaustive_category_ids'] image_neg_category_ids = image['neg_category_ids'] full_path = os.path.join(image_dir, filename) if not tf.gfile.Exists(full_path): tf.logging.warn(f'image {full_path} not exists! skip') return False, None, None with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/not_exhaustive_category_ids': dataset_util.int64_list_feature(image_not_exhaustive_category_ids), 'image/image_neg_category_ids': dataset_util.int64_list_feature(image_neg_category_ids), } if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations['bbox']) xmin_single = max(float(x) / image_width, 0.0) xmax_single = min(float(x + width) / image_width, 1.0) ymin_single = max(float(y) / image_height, 0.0) ymax_single = min(float(y + height) / image_height, 1.0) if xmax_single <= xmin_single or ymax_single <= ymin_single: continue xmin.append(xmin_single) xmax.append(xmax_single) ymin.append(ymin_single) ymax.append(ymax_single) is_crowd.append(0) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_mask: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), }) if include_mask: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return True, filename, example
def create_tf_example(): count = 0 counter = 0 writer = tf.python_io.TFRecordWriter( "/Data2TB/chl_data/rgb/train/augmented/train.record") #output file #with open(filename) as f: # content = f.readlines() #content = [x.strip() for x in content] #new_img = PIL.Image.new("L", (480, 640)) #new_img.putdata(content) #with tf.gfile.GFile(filename, 'rb') as fid: # encoded_jpg = fid.read() with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f: jsondata = json.load(f) for i in range(0, len(jsondata['frames'])): #looping through JSON objects height = jsondata['frames'][i]["height"] # Image height width = jsondata['frames'][i]["width"] # Image width #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file #encoded_image_data = None # Encoded image bytes filename_only = jsondata['frames'][i]['file'] print(str(i) + ": " + filename_only) filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [ ] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for j in range(0, len(jsondata['frames'][i]['annotations'])): if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'): xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width xmax = ( jsondata['frames'][i]['annotations'][j]['x'] + jsondata['frames'][i]['annotations'][j]['width']) / width ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height ymax = ( jsondata['frames'][i]['annotations'][j]['y'] + jsondata['frames'][i]['annotations'][j]['height']) / height if xmin > 1: xmin = 1.0 if xmax > 1: xmax = 1.0 if ymin > 1: ymin = 1.0 if ymax > 1: ymax = 1.0 if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1): print("UNNORMALIZED STUFF") xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append('head') classes.append(1) #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'): # xmin = (jsondata['frames'][i]['annotations'][j]['x']) # ymin = (jsondata['frames'][i]['annotations'][j]['y']) # if(xmin + 2 > width): # xmin = width - 2 # if(ymin + 2 > height): # ymin = height - 2 # xmax = xmin + 2 # ymax = ymin + 2 # xminf = xmin/width # xmaxf = xmax/width # yminf = ymin/height # ymaxf = ymax/height # # if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1): # print("UNNORMALIZED STUFF") # xmins.append(xminf) # xmaxs.append(xmaxf) # ymins.append(yminf) # ymaxs.append(ymaxf) # classes_text.append('shoulder') # classes.append(2) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close()