Exemplo n.º 1
0
def create_tf_example(f, image_path=None):
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    filename = f.readline().rstrip()
    filepath = os.path.join(image_path, filename)
    image_raw = cv2.imread(filepath)

    encoded_image_data = open(filepath, 'rb').read()
    key = hashlib.sha256(encoded_image_data).hexdigest()

    height, width, channel = image_raw.shape

    face_num = int(f.readline().rstrip())
    valid_face_num = 0

    for i in range(face_num):
        annot = f.readline().rstrip().split()
        # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY
        if (float(annot[2]) > 25.0):
            if (float(annot[3]) > 30.0):
                xmins.append(max(0.005, (float(annot[0]) / width)))
                ymins.append(max(0.005, (float(annot[1]) / height)))
                xmaxs.append(
                    min(0.995, ((float(annot[0]) + float(annot[2])) / width)))
                ymaxs.append(
                    min(0.995, ((float(annot[1]) + float(annot[3])) / height)))
                classes_text.append('face'.encode('utf8'))
                classes.append(1)
                print(xmins[-1], ymins[-1], xmaxs[-1], ymaxs[-1],
                      classes_text[-1], classes[-1])
                valid_face_num += 1

    print("Face Number is %d" % face_num)
    print("Valid face number is %d" % valid_face_num)

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }
    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))

    return valid_face_num, tf_example
Exemplo n.º 2
0
def create_tf(input_path,filename,bb_list):
    height = size # Image height
    width = size # Image width
    encoded_image_data = tf.gfile.GFile(input_path+filename).read() # Encoded image bytes
    image_format = b'jpeg' # b'jpeg' or b'png'
    xmins = [i[0]/float(size) for i in bb_list] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [i[1]/float(size) for i in bb_list] # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [i[2]/float(size) for i in bb_list] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [i[3]/float(size) for i in bb_list] # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = ['ship'] * len(bb_list) # List of string class name of bounding box (1 per box)
    classes = [1] * len(bb_list) # List of integer class id of bounding box (1 per box)
    tf_image = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_image
def create_tf_record(image_file, height, width, xmins, ymins, xmaxs, ymaxs, classes_id, classes_text):
  '''
  param xmins: List of normalized left x coordinates in bounding box (1 per box)
  param xmaxs: List of normalized right x coordinates in bounding box (1 per box)
  param ymins: List of normalized top y coordinates in bounding box (1 per box)
  param ymaxs: List of normalized bottom y coordinates in bounding box (1 per box)
  param classes_text: List of string class name of bounding box (1 per box)
  param classes: List of integer class id of bounding box (1 per box)
  '''
  image_data = tf.gfile.FastGFile(image_file, 'rb').read()
  encoded_image_data = tf.compat.as_bytes(image_data) # Encoded image bytes
  filename = tf.compat.as_bytes(image_file)
  image_format = 'png'.encode('utf8') # b'jpeg' or b'png'

  tf_single_dataset = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(os.path.basename(filename)),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes_id),
  }))
  return tf_single_dataset
def create_tf_example(filename, dirpath, boxes):

    #Read in the image
    with tf.gfile.GFile(os.path.join(dirpath, filename), 'rb') as fid:
        encoded_image_data = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_image_data)
    image = Image.open(encoded_jpg_io)

    width, height = image.size
    _, ext = os.path.splitext(filename)
    image_format = ext[1:].encode('UTF-8')  # b'jpeg' or b'png'

    xmins = [
        x / width for x in boxes['xmins']
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
        x / width for x in boxes['xmaxs']
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
        y / height for y in boxes['ymins']
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
        y / height for y in boxes['ymaxs']
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)

    #DEFAULT LABELS AND CLASSES FOR IVF
    classes_text = [b'sperm'] * len(
        xmins)  # List of string class name of bounding box (1 per box)
    classes = [1] * len(
        xmins)  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('UTF-8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('UTF-8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 5
0
def create_tf_example(group, path, labels_mapping):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        class_idx = labels_mapping.get(row['class'], None)
        if class_idx is None:
            continue

        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)

        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_idx)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 6
0
def create_tf_example(group, img_path):

    with tf.gfile.GFile(os.path.join(img_path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    width, height = image.size
    filename = group.filename.encode('utf8')
    image_format = b'jpg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['label'].encode('utf8'))
        classes.append(class_text_to_int(row['label']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 7
0
def create_tf_example(image_file):
    print("opening {}".format(image_file))
    with tf.gfile.GFile(image_file, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = os.path.basename(image_file).encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    xml_file = image_file + ".xml"
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
        classes_text.append(member[0].text.encode('utf-8'))
        classes.append(class_text_to_int(member[0].text))
        xmins.append(int(member[5][0].text) / width)
        xmaxs.append(int(member[5][1].text) / width)
        ymins.append(int(member[5][2].text) / height)
        ymaxs.append(int(member[5][3].text) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 8
0
def create_tf_example(image_data):
    height = image_data.height  # Image height
    width = image_data.width  # Image width

    with tf.gfile.GFile(image_data.path, 'rb') as fid:
        encoded_png = fid.read()

    filename = image_data.id.encode('utf8')

    image_format = b'png'  # b'jpeg' or b'png'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for bbox in image_data.bboxes:
        xmins.append(bbox.min_x / width)
        xmaxs.append(bbox.max_x / width)
        ymins.append(bbox.min_y / height)
        ymaxs.append(bbox.max_y / height)
        classes_text.append(bbox.class_label.encode('utf8'))
        classes.append(bbox.class_id)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 9
0
def create_tf_example(example, height, width):
    
    ## Udacity's real-life data set
    ## height = 1096 # Image height
    ## width = 1368 # Image width
    
    ## Udacity's simulator data set
    ## height = 600
    ## width = 800

    filename = example['filename'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin']+ box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example
Exemplo n.º 10
0
def __create_tf_example(frame_data, sorted_label_list):
    im = PIL.Image.open(io.BytesIO(frame_data.image))
    arr = io.BytesIO()
    if frame_data.format == 'jpg':
        format = 'JPEG'
    else:
        format = frame_data.format.upper()
    im.save(arr, format=format)
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    rects, labels = bbox_writer.convert_text_to_rects_and_labels(
        frame_data.bboxes_text)
    # List of normalized coordinates, 1 per box, capped to [0, 1]
    xmins = [max(min(rect[0] / width, 1), 0) for rect in rects]  # left x
    xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects]  # right x
    ymins = [max(min(rect[1] / height, 1), 0) for rect in rects]  # top y
    ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects]  # bottom y

    classes_txt = [label.encode('utf-8') for label in labels]  # String names
    label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)}
    class_ids = [label_to_id_dict[label] for label in labels]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/source_id':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(frame_data.format.encode('utf-8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_txt),
            'image/object/class/label':
            dataset_util.int64_list_feature(class_ids),
        }))
    label_counter_for_frame = collections.Counter(labels)
    is_negative = len(rects) == 0
    return tf_example, label_counter_for_frame, is_negative
Exemplo n.º 11
0
def group_to_tf_record(point, image_directory):
    format_png = b'png'
    format_jpg = b'jpeg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    class_nums = []
    class_ids = []
    # changed point[0] to point as is just one point
    image_id = point['id']

    if image_id.startswith('frame'):
        filename = os.path.join(image_directory, image_id + '.png')
        format = format_png
    else:
        filename = os.path.join(image_directory, image_id + '.jpg') #.decode()
        format = format_jpg

    try:
        image = Image.open(filename)
        width, height = image.size
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_image = bytes(fid.read())
    except:
        return None
    key = hashlib.sha256(encoded_image).hexdigest()
    for anno in point['annotations']:
        xmins.append(float(anno['x0']))
        xmaxs.append(float(anno['x1']))
        ymins.append(float(anno['y0']))
        ymaxs.append(float(anno['y1']))
        class_nums.append(anno['class_num'])
        class_ids.append(bytes(anno['label'].encode('utf8')))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))),
        'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(class_ids),
        'image/object/class/label': dataset_util.int64_list_feature(class_nums)
    }))
    return tf_example
Exemplo n.º 12
0
def create_tf_example(filename):
    coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2]
    leftUp, rightDown = [[int(eel) for eel in el.split('&')]
                         for el in coordinates.split('_')]
    xmin, ymin = leftUp
    xmax, ymax = rightDown

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    height = image.height
    width = image.width
    key = hashlib.sha256(encoded_jpg).hexdigest()

    ymins = [float(ymin) / height]
    xmins = [float(xmin) / width]
    ymaxs = [float(ymax) / height]
    xmaxs = [float(xmax) / width]

    labels_text = ['vehicle plate'.encode('utf8')]
    labels = [2]

    # print("---------image size:",image.size)
    # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax))
    # print("---------width:{}, height:{}".format(width,height))

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(labels_text),
        'image/object/class/label': dataset_util.int64_list_feature(labels),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def create_tfdatapoint(file_loc, file, labels):
    img = Image.open(os.path.join(file_loc, 'images', file))
    (width, height) = img.size
    encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file),
                                "rb").read()
    encoded = bytes(encoded)
    image_format = b'png'
    filename = file.split('.')[0]
    data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt'))
    data = data.reshape(int(data.size / 5), 5)

    classes = [int(x) for x in data[:, 0]]
    classes_text = [labels[x].encode('utf8') for x in classes]
    xmins = data[:, 1] - (data[:, 3] / 2.0)
    xmaxs = data[:, 1] + (data[:, 3] / 2.0)
    ymins = data[:, 2] - (data[:, 4] / 2.0)
    ymaxs = data[:, 2] + (data[:, 4] / 2.0)

    tf_label_and_data = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/source_id':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/encoded':
            dataset_util.bytes_feature(encoded),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_label_and_data
def create_tf_example(group, path):
    # Opening and readinf the files
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    # Encode the image in jpeg format to array values
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    # Setting up the image size
    width, height = image.size

    #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'])
        xmaxs.append(row['xmax'])
        ymins.append(row['ymin'])
        ymaxs.append(row['ymax'])
        classes.append(row['class'].encode('utf8'))

    # This is already exisiting code to convert csv to tfrecord
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/label': dataset_util.bytes_list_feature(
                classes),
        }))
    return tf_example
Exemplo n.º 15
0
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    bboxes = img_data['bboxes']
    xmin, xmax, ymin, ymax = [], [], [], []
    for bbox in bboxes:
        xmin.append(bbox[2])
        xmax.append(bbox[0])
        ymin.append(bbox[3])
        ymax.append(bbox[1])
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(img_data['height']),
            'image/width':
            dataset_util.int64_feature(img_data['width']),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/label':
            dataset_util.int64_list_feature(img_data['labels']),
            'image/object/class/text':
            dataset_util.bytes_list_feature(img_data['text']),
            'image/encoded':
            dataset_util.bytes_feature(img_data['pixel_data']),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
            'image/object/class/file':
            dataset_util.bytes_feature(img_data['file'].encode('utf-8')),
        }))
    return example
def create_tf_example(group, path):
    base = os.path.splitext(group.filename)[0]
    with tf.gfile.GFile(os.path.join(path, base + ".jpg"), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(int_to_class_text(row['classID']).encode('utf8'))
        classes.append(row['classID']+1)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    # bboxes = img_data['bboxes']
    # xmin, xmax, ymin, ymax = [], [], [], []
    # for bbox in bboxes:
    #     xmin.append(bbox[0])
    #     xmax.append(bbox[0] + bbox[2])
    #     ymin.append(bbox[1])
    #     ymax.append(bbox[1] + bbox[3])

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/id': dataset_util.int64_feature(img_data['id']),
        'image/caption': dataset_util.bytes_list_feature(img_data['caption']),
        'image/encoded': dataset_util.bytes_feature(img_data['pixel_data'])
    }))
    return example
Exemplo n.º 18
0
def create_tf_example(height, width, filename, encoded_image_data,
                      image_format, xmins, xmaxs, ymins, ymaxs, classes_text,
                      classes):
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),  # Image height
            'image/width':
            dataset_util.int64_feature(width),  # Image width
            'image/filename':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/source_id':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/encoded':
            dataset_util.bytes_feature(
                encoded_image_data),  # Encoded image bytes
            'image/format':
            dataset_util.bytes_feature(image_format),  # b'jpeg' or b'png'
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(
                xmins),  # normalized left x coordinate in bounding box
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(
                xmaxs),  # normalized right x coordinate in bounding box
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(
                ymins),  # normalized top y coordinate in bounding box
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(
                ymaxs),  # normalized bottom y coordinate in bounding box
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                classes_text),  # string class name of bounding box
            'image/object/class/label':
            dataset_util.int64_list_feature(
                classes),  # integer class id of bounding box
        }))
    return tf_example
Exemplo n.º 19
0
def create_tf_example(csv, img_dir):
    img_fname = csv[0]
    x1, y1, x2, y2 = list(map(int, csv[1:-1]))
    cls_idx = int(csv[-1])
    cls_text = config.CLASS_NAMES[cls_idx].encode('utf8')
    with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    xmin = [x1 / width]
    xmax = [x2 / width]
    ymin = [y1 / height]
    ymax = [y2 / height]
    cls_text = [cls_text]
    cls_idx = [cls_idx]

    filename = img_fname.encode('utf8')
    image_format = b'jpg'

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(cls_text),
        'image/object/class/label': dataset_util.int64_list_feature(cls_idx),
    }))

    return tf_example
Exemplo n.º 20
0
    def createTFExample(self):
        """Convert XML derived dict to tf.Example proto.
        Notice that this function normalizes the bounding box coordinates provided
        by the raw data.
        Args: None
        Returns:
            example: The converted tf.Example.
        """
        with tf.io.gfile.GFile(self.xml, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        # the image might be processed in a different location
        # so overwrite the path to the input image path for consistency
        data['path'] = self.jpg if self.crop == '' else self.__cropImage(data)

        print(f"Processing image {data['path']}")

        width = int(data['size']['width'])
        height = int(data['size']['height'])
        filename = data['filename'].encode('utf8')
        with tf.io.gfile.GFile(data['path'], 'rb') as fid:
            encoded_image_data = fid.read()
        image_format = 'jpeg'.encode('utf8')

        # List of normalized left x coordinates in bounding box (1 per box)
        xmins = []
        # List of normalized right x coordinates in bounding box (1 per box)
        xmaxs = []
        # List of normalized top y coordinates in bounding box (1 per box)
        ymins = []
        # List of normalized bottom y coordinates in bounding box (1 per box)
        ymaxs = []
        # List of string class name of bounding box (1 per box)
        classes_text = []
        classes_id = []  # List of integer class id of bounding box (1 per box)

        image = util.loadImage(data['path'])

        for obj in data['object']:
            if obj['name'] not in classes or not self.__isValidBox(
                    obj, width, height):
                print('Unexpected object: ' + str(obj) + ' in ' + data['path'])
                continue
            xmins.append(float(obj['bndbox']['xmin']) / width)
            ymins.append(float(obj['bndbox']['ymin']) / height)
            xmaxs.append(float(obj['bndbox']['xmax']) / width)
            ymaxs.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes_id.append(getClassID(obj['name']))
            util.drawBox(image, self.__encodeBox(obj['bndbox']))

        util.saveImage(image,
                       str(data['path']).replace(".jpg", "-with-boxes.jpg"))

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes_id),
            }))
        return tf_example
Exemplo n.º 21
0
def read_xml_make_tfrecord():
    num_data = 8
    for i in range(num_data):
        globals()['train_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data))] = tensorflow.io.TFRecordWriter(
                'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data)))

    for i in range(int(num_data / 8)):
        globals()['test_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))
        globals()['valid_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))

    length = len(os.listdir(folder))

    for number, img_name in enumerate(os.listdir(folder)):
        if img_name[-4:] != '.jpg': continue
        filename = img_name[:-4]
        img = cv2.imread(folder + filename + ".jpg")
        height, width = img.shape[:2]

        mask = cv2.imread('mask/' + filename + '.jpg', 0)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5),
                                                               np.uint8))
        cv2.imshow("asdas", mask)
        cv2.waitKey()
        _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE,
                                          cv2.CHAIN_APPROX_SIMPLE)
        print(contours)
        contours = sorted(contours, key=lambda x: len(x), reverse=True)

        x = [temp[0][0] for temp in contours[0]]
        y = [temp[0][1] for temp in contours[0]]
        xmin = min(x)
        xmax = max(x)
        ymin = min(y)
        ymax = max(y)
        # cv2.circle(img,(xmin,ymin),5,(255,0,0),5)
        # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5)
        # cv2.imshow("asd",img)
        # cv2.waitKey()
        object_name = 'passport'
        pixel_val = 255
        with tensorflow.io.gfile.GFile(folder + filename + ".jpg",
                                       'rb') as fid:
            encoded_image_data = fid.read()
        key = hashlib.sha256(encoded_image_data).hexdigest()

        with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg",
                                       'rb') as fid:
            encoded_mask_data = fid.read()

        encoded_mask = io.BytesIO(encoded_mask_data)
        mask = Image.open(encoded_mask)
        mask_np = np.asarray(mask.convert('L'))
        mask_remapped = (mask_np == pixel_val).astype(np.uint8)
        # print("mask",mask_remapped.shape)
        # cv2.imshow("asd",mask_remapped*255)
        # cv2.waitKey()
        mask_img = Image.fromarray(mask_remapped)
        output = io.BytesIO()
        mask_img.save(output, format='PNG')

        xmins = [xmin / width]
        xmaxs = [xmax / width]
        ymins = [ymin / height]
        ymaxs = [ymax / height]
        classes_text = [object_name.encode('utf8')]
        classes = [1]
        masks = [output.getvalue()]

        print(img_name)
        print(xmins)
        print(xmaxs)
        print(ymins)
        print(ymaxs)
        print(classes_text)
        print(classes)
        print(masks)
        example = tensorflow.train.Example(features=tensorflow.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/key/sha256':
                dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
                'image/object/mask':
                dataset_util.bytes_list_feature(masks),
            }))
        if number < length * 0.8:
            globals()['train_writer_{:05d}-of-{:05d}'.format(
                int(number / (length * 0.8) * num_data),
                int(num_data))].write(example.SerializeToString())

        elif number < length * 0.9:
            globals()['valid_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.8) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
        elif number < length:

            globals()['test_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.9) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
Exemplo n.º 22
0
def xml_to_tf(path_input, path_output):
    xml_list = []
    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]

    print(path_output)
    writer = tf.io.TFRecordWriter(path_output)

    files = os.listdir(path_input)
    for file in files:
        if file.endswith(".xml"):
            xmlFile = path_input + file

            tree = ET.parse(xmlFile)
            root = tree.getroot()

            filename = root[1].text
            width = int(root[4][0].text)
            height = int(root[4][1].text)

            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            classes_text = []
            classes = []

            for member in root.findall('object'):
                beer = member[0].text
                xmin = int(member[4][0].text)
                ymin = int(member[4][1].text)
                xmax = int(member[4][2].text)
                ymax = int(member[4][3].text)

                xmins.append(xmin / width)
                xmaxs.append(xmax / width)
                ymins.append(ymin / height)
                ymaxs.append(ymax / height)
                classes_text.append(beer.encode('utf8'))
                classes.append(class_text_to_int(beer))

            with tf.io.gfile.GFile(
                    os.path.join(path_input, '{}'.format(filename)),
                    'rb') as fid:
                encoded_jpg = fid.read()
                print(encoded_jpg)

            tf_example = tf.train.Example(features=tf.train.Features(
                feature={
                    'image/height':
                    dataset_util.int64_feature(height),
                    'image/width':
                    dataset_util.int64_feature(width),
                    'image/filename':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/source_id':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpg),
                    'image/format':
                    dataset_util.bytes_feature(IMAGE_FORMAT),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature(xmins),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature(xmaxs),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature(ymins),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature(ymaxs),
                    'image/object/class/text':
                    dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label':
                    dataset_util.int64_list_feature(classes),
                }))

            writer.write(tf_example.SerializeToString())
    writer.close()
    output_path = os.path.join(os.getcwd(), path_output)
    print('Successfully created the TFRecords: {}'.format(output_path))
Exemplo n.º 23
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys:
        [u'license', u'file_name', u'coco_url', u'height', u'width',
        u'date_captured', u'flickr_url', u'id']
      annotations_list:
        list of dicts with keys:
        [u'segmentation', u'area', u'iscrowd', u'image_id',
        u'bbox', u'category_id', u'id']
        Notice that bounding box coordinates in the official COCO dataset are
        given as [x, y, width, height] tuples using absolute coordinates where
        x, y represent the top-left (0-indexed) corner.  This function converts
        to the format expected by the Tensorflow Object Detection API (which is
        which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
        to image size).
      image_dir: directory containing the image files.
      category_index: a dict containing COCO category information keyed
        by the 'id' field of each category.  See the
        label_map_util.create_category_index function.
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.
    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Exemplo n.º 24
0
def create_tf_example(filename, label_file):
    img = cv2.imread(filename)
    height, width, channels = img.shape

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_image_data = fid.read()

    image_format = b'jpg'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    with open(label_file, 'r') as f:
        csvreader = csv.reader(f, delimiter=' ')
        head = True

        for row in csvreader:
            if head:
                head = False
                continue

            name = row[-1]
            classes_text.append(name)
            classes.append(get_index(name))

            xmins.append(float(row[0]) / width)
            xmaxs.append(float(row[2]) / width)
            ymins.append(float(row[1]) / height)
            ymaxs.append(float(row[3]) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
Exemplo n.º 25
0
    def _create_tf_entry(self, categories, img, label, filename, annotations):
        imageFormat = b'jpg'

        width, height = img.size

        imgByteArr = io.BytesIO()
        img.save(imgByteArr, format='JPEG')
        encodedImageData = imgByteArr.getvalue()

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        for annotation in annotations:
            rect = None
            if type(
                    annotation.data
            ) is Rectangle:  #currently we only support Rect annotations, TODO: change me
                rect = annotation.data
            elif type(annotation.data) is Polygon:
                rect = annotation.data.rect

            if rect is not None:
                trimmed_rect = rect.trim(
                    Rectangle(0, 0, width, height)
                )  #scale to image dimension in case annotation exceeds image width/height

                if trimmed_rect.left < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect left dimension invalid! (<0)")
                if trimmed_rect.top < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect top dimension invalid! (<0)")
                if trimmed_rect.width < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect width dimension invalid! (<0)")
                if trimmed_rect.height < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect height dimension invalid! (<0)")

                if (trimmed_rect.left + trimmed_rect.width) > width:
                    raise ImageMonkeyGeneralError(
                        "bounding box width > image width!")
                if (trimmed_rect.top + trimmed_rect.height) > height:
                    raise ImageMonkeyGeneralError(
                        "bounding box height > image height!")

                xmin = trimmed_rect.left / float(width)
                xmax = (trimmed_rect.left + trimmed_rect.width) / float(width)
                ymin = trimmed_rect.top / float(height)
                ymax = (trimmed_rect.top + trimmed_rect.height) / float(height)

                #sanity checks
                if xmin > xmax:
                    raise ImageMonkeyGeneralError("xmin > xmax!")

                if ymin > ymax:
                    raise ImageMonkeyGeneralError("ymin > ymax!")

                if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0):
                    continue  #skip bounding boxes that are 0

                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)

        #we might have some images in our dataset, which don't have a annotation, skip those
        if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0)
                or (len(ymaxs) == 0)):
            return None

        classes = [(categories.index(label) + 1)] * len(
            xmins)  #class indexes start with 1
        labels = [label.encode('utf8')] * len(xmins)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename.encode()),
                'image/source_id':
                dataset_util.bytes_feature(filename.encode()),
                'image/encoded':
                dataset_util.bytes_feature(encodedImageData),
                'image/format':
                dataset_util.bytes_feature(imageFormat),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(labels),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        return tf_example
Exemplo n.º 26
0
def dict_to_tf_example(data,
                       image_path,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='images'):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      image_path: Full path to image file
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    # img_path = os.path.join(
    #     data['folder'], image_subdirectory, data['filename'])
    # full_path = os.path.join(dataset_directory, img_path)
    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    filename = full_path.split('/')[-1]

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = False  # bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            # truncated.append(int(obj['truncated']))
            truncated.append(0)
            # poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Exemplo n.º 27
0
def background_tf_example(image_path, ):
    """
    Args:
      image_path: Full path to image file

    Returns:
      example: The converted tf.Example.
    """

    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    filename = full_path.split('/')[-1]
    width = image.width
    height = image.height

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Exemplo n.º 28
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      include_mask=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids',
      u'neg_category_ids']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official LVIS dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing LVIS category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_mask: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    success: whether the conversion is successful
    filename: image filename
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['coco_url']
    filename = osp.join(*filename.split('/')[-2:])

    image_id = image['id']
    image_not_exhaustive_category_ids = image['not_exhaustive_category_ids']
    image_neg_category_ids = image['neg_category_ids']

    full_path = os.path.join(image_dir, filename)
    if not tf.gfile.Exists(full_path):
        tf.logging.warn(f'image {full_path} not exists! skip')
        return False, None, None

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/not_exhaustive_category_ids':
        dataset_util.int64_list_feature(image_not_exhaustive_category_ids),
        'image/image_neg_category_ids':
        dataset_util.int64_list_feature(image_neg_category_ids),
    }

    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])

            xmin_single = max(float(x) / image_width, 0.0)
            xmax_single = min(float(x + width) / image_width, 1.0)
            ymin_single = max(float(y) / image_height, 0.0)
            ymax_single = min(float(y + height) / image_height, 1.0)
            if xmax_single <= xmin_single or ymax_single <= ymin_single:
                continue
            xmin.append(xmin_single)
            xmax.append(xmax_single)
            ymin.append(ymin_single)
            ymax.append(ymax_single)

            is_crowd.append(0)
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_mask:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_mask:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return True, filename, example
Exemplo n.º 29
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):

    # image_height = image[2]
    # image_width = image[1]
    # filename = image[0]# TODO(user): Populate the following variables from your example.
    # print(image)
    height = image['height']  # Image height
    width = image['width']  # Image width
    filename = image[
        'filename']  # Filename of the image. Empty if image is not from file

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_image_io = io.BytesIO(encoded_jpg)  # Encoded image bytes
    image = PIL.Image.open(encoded_image_io)
    only_file_name, image_format = os.path.splitext(filename)

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per bo)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    # print(len(annotations_list))
    for annotation in annotations_list:
        # print(annotation)
        xmins.append(annotation['xmin'] / width)
        xmaxs.append(annotation['xmax'] / width)
        ymins.append(annotation['ymin'] / height)
        ymaxs.append(annotation['ymax'] / height)
        classes_text.append(annotation['label_text'].encode('utf8'))
        classes.append(annotation['label'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemplo n.º 30
0
def json_to_record(j):
    assert (len(j["image_size"]) == 1)
    assert (len(j["categories"]) == len(j["annotations"]))

    image_size = j["image_size"][0]
    height = image_size["height"]
    width = image_size["width"]

    filename = os.path.basename(j["file"])

    # actual image bytes? refer to dataset_tools/create_pet_tf_record.py
    with tf.gfile.GFile(j["file"], "rb") as fid:
        encoded_jpg = fid.read()
        pass
    encoded_image_data = encoded_jpg
    image_format = b'jpeg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    classes_text = []
    classes = []

    for annot in j["annotations"]:
        c_name = class_id_to_name(annot["class_id"])
        classes_text.append(c_name.encode("utf8"))
        # class_ids are indexed by 1 for tensorflow
        classes.append(annot["class_id"] + 1)
        corners = get_box_corners(annot)
        xmins.append(corners["xmin"] / width)
        xmaxs.append(corners["xmax"] / width)
        ymins.append(corners["ymin"] / height)
        ymaxs.append(corners["ymax"] / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    # print(tf_example)
    return tf_example
    pass