def create_tf_entry(label_and_data_info): height = 800 # Image height width = 1360 # Image width filename = label_and_data_info[ 0] # Filename of the image. Empty if image is not from file img = Image.open(IMAGE_FOLDER + filename.decode()) b = io.BytesIO() img.save(b, 'PNG') encoded_image_data = b.getvalue() # Encoded image bytes image_format = b'png' # b'jpeg' or b'png' xmins = [ x / width for x in label_and_data_info[1] ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [x / width for x in label_and_data_info[2] ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ y / height for y in label_and_data_info[3] ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [y / height for y in label_and_data_info[4] ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = label_and_data_info[ 5] # List of string class name of bounding box (1 per box) classes = label_and_data_info[ 6] # List of integer class id of bounding box (1 per box) tf_label_and_data = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_label_and_data
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def make_tf_record(dataset_list, task_name, task_object, tfrecord_path): tot = len(dataset_list) old_percent, percent, iterator= 0, 0, 0 writer = tf.python_io.TFRecordWriter(tfrecord_path) for dataset in dataset_list: # 이미지 하나당 tf record feature 하나씩 img = storage.download_img_to_bytes(dataset['filename']) img_format = dataset['filename'].split('.')[-1] xmins, ymins, xmaxs, ymaxs, class_names, class_indices = [], [], [], [], [], [] for label in dataset[task_name]['label']: xmins.append(label['x_min']) ymins.append(label['y_min']) xmaxs.append(label['x_max']) ymaxs.append(label['y_max']) # 사진 한장에 물체가 한가지 종류밖에 없음 만약 # tf record 파일 여러개인데 하나는 자동차, 다른 하나는 번호판이 되면 # class_indices 가 0 ,1 이 되야하는데 이걸 어떻게 구분하지? 여러개 다운로드로 해야할거 같기도 하고 # class_name = ['car', 'plate'] 라고 하면 # class_indices = ['0', '1'] 이런식으로 저장되야 car 랑 plate 두가지 물체가 서로 다르다라고 앎 class_indices.append(0) class_names.append(task_object.encode('utf-8')) feature = tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(dataset['height']), 'image/width': dataset_util.int64_feature(dataset['width']), 'image/filename': dataset_util.bytes_feature(dataset['filename'].encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(dataset['filename'].encode('utf-8')), 'image/encoded': dataset_util.bytes_feature(img), 'image/format': dataset_util.bytes_feature(img_format.encode('utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(class_names), 'image/object/class/label': dataset_util.int64_list_feature(class_indices) }) sample = tf.train.Example(features=feature) writer.write(sample.SerializeToString()) # 확인용 코드 iterator += 1 percent = iterator * 100 // tot if old_percent != percent: print(percent) old_percent = percent writer.close()
def xml_to_tf(path_input, path_output): xml_list = [] column_name = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] writer = tf.io.TFRecordWriter(path_output) files = os.listdir(path_input) for file in files: if file.endswith(".xml"): xmlFile = path_input + file tree = ET.parse(xmlFile) root = tree.getroot() filename = root[1].text width = int(root[4][0].text) height = int(root[4][1].text) xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for member in root.findall('object'): beer = member[0].text xmin = int(member[4][0].text) ymin = int(member[4][1].text) xmax = int(member[4][2].text) ymax = int(member[4][3].text) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append(beer.encode('utf8')) classes.append(class_text_to_int(beer)) with tf.io.gfile.GFile( os.path.join(path_input, '{}'.format(filename)), 'rb') as fid: encoded_jpg = fid.read() tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(IMAGE_FORMAT), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), path_output) print('Successfully created the TFRecords: {}'.format(output_path))
def create_tf_example(path, frame_num): """Creates a tf.Example proto for an individual image and its masks. Args: path: path to directory containing all images to be encoded frame_num: The frame number of the image to be encoded. Returns: example: The created tf.Example. """ # TODO(user): Populate the following variables from your example. img_path = path + '/' + str(frame_num) + '/image/image_' + str( frame_num) + '.png' img = imread(img_path) # plt.imshow(img) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) # image = PIL.Image.open(encoded_png_io) height = int(img.shape[0]) # Image height width = int(img.shape[1]) # Image width # print("Height: {}, width: {}".format(height, width)) filename = str( frame_num ) # + '.png' # Filename of the image. Empty if image is not from file # encoded_image_data = img.tobytes() # Encoded image bytes image_format = b'png' # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) # masks = [] encoded_mask_png_list = [] for mask_id in (os.listdir(path + '/' + frame_num + '/masks/')): # mask_img = imread(path + '/' + frame_num + '/masks/' + mask_id) # mask_img = cv2.convertScaleAbs(mask_img) # mask_img, contours, hierarchy = cv2.findContours(mask_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # x, y, w, h = cv2.boundingRect(contours[0]) # mask_img = imread(path + '/' + frame_num + '/masks/' + mask_id)[:, :, :3] # indices = np.where(mask_img) mask_path = path + '/' + frame_num + '/masks/' + mask_id with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) mask_np = np.asarray(mask)[:, :, :3] # print(mask_np.shape) indices = np.where(mask_np) # print("Indices: ", indices) if indices[0].size > 0: xmin = float(np.min(indices[1])) xmax = float(np.max(indices[1])) ymin = float(np.min(indices[0])) ymax = float(np.max(indices[0])) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append('c_elegans'.encode('utf8')) classes.append(int(1)) mask_remapped = mask_np.astype(np.uint8) # masks.append(mask_remapped) mask_img = PIL.Image.fromarray(mask_remapped) # plt.imshow(mask_img) # plt.show() output = io.BytesIO() mask_img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) else: pass # encoded_mask_png_list = [] # for mask in masks: # mask_img = PIL.Image.fromarray(mask) # output = io.BytesIO() # mask_img.save(output, format='PNG') # encoded_mask_png_list.append(output.getvalue()) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(encoded_mask_png_list), })) return tf_example
def create_tf_example(image_path, image_name, height, width, sign_type_num, x_min, y_min, x_max, y_max): """Creates a tf.Example proto from image. Args: example: The jpg encoded data of the cat image. Returns: example: The created tf.Example. """ # TODO(user): Populate the following variables from your example. filename = image_name # Filename of the image. Empty if image is not from file encoded_image_data = image_data = tf.gfile.FastGFile(image_path, 'rb').read() image_format = b'jpeg' # b'jpeg' or b'png' xmins = [ x_min / ORIGINAL_WIDTH ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [x_max / ORIGINAL_WIDTH ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ y_min / ORIGINAL_HEIGHT ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [y_max / ORIGINAL_HEIGHT ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes = [sign_type_num ] # List of integer class id of bounding box (1 per box) classes_text = list(mydict.keys())[list( mydict.values()).index(sign_type_num)] print( classes_text) # List of string class name of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([classes_text.encode()]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example