def geo_json_bbox(coordinates): """ Calculate the bounding box from the GeoJSON coordinates. """ bbox = None for i, coordinate in enumerate(coordinates): vec = V2(coordinate[0], coordinate[1]) if i == 0: bbox = BBox(vec, vec) else: bbox.expand(vec) return bbox
def calculate_crop(image, uncropped_bbox, boundary_bbox): """ Crop an image with a given geographic bounding to the new bounding box. As we round up the pixel size of the new image, the geographic bounding box of new image will be potentially slightly larger then the required one, so also calculate the correct bounding box for the result. """ pixel_min_x = floor((boundary_bbox.min.x - uncropped_bbox.min.x) / uncropped_bbox.width * image.size[0]) pixel_min_y = floor((uncropped_bbox.max.y - boundary_bbox.max.y) / uncropped_bbox.height * image.size[1]) pixel_max_x = ceil((boundary_bbox.max.x - uncropped_bbox.min.x) / uncropped_bbox.width * image.size[0]) pixel_max_y = ceil((uncropped_bbox.max.y - boundary_bbox.min.y) / uncropped_bbox.height * image.size[1]) cropped_pixels = (pixel_min_x, pixel_min_y, pixel_max_x, pixel_max_y) cropped_bbox = BBox( V2( uncropped_bbox.min.x + pixel_min_x / image.size[0] * uncropped_bbox.width, uncropped_bbox.max.y - pixel_max_y / image.size[1] * uncropped_bbox.height), V2( uncropped_bbox.min.x + pixel_max_x / image.size[0] * uncropped_bbox.width, uncropped_bbox.max.y - pixel_min_y / image.size[1] * uncropped_bbox.height)) return (cropped_pixels, cropped_bbox)
def flip(img, bbox, landmark): """ Flip image horizontally as well as the bbox and landmark. param: -img, the original image -landmark, using absolute coordinates """ # horizontally flip the image img_flipped = cv2.flip(img.copy(), 1) # horizontally flip the bbox and crop the bbox region bbox_flipped = BBox([ img.shape[1] - bbox.right, img.shape[1] - bbox.left, bbox.top, bbox.bottom ]) img_flipped = img_flipped[int(bbox_flipped.top):int(bbox_flipped.bottom) + 1, int(bbox_flipped.left):int(bbox_flipped.right) + 1] # horizontally flip the landmark landmark_flipped = np.asarray([(img.shape[1] - x, y) for (x, y) in landmark]) # exchange the right eye and left eye, right mouth corner and left mouth corner # however, this may not be needed landmark_flipped[[0, 1]] = landmark_flipped[[1, 0]] landmark_flipped[[3, 4]] = landmark_flipped[[4, 3]] return (img_flipped, bbox_flipped, landmark_flipped)
def calculate_composite_bbox_and_dimensions(tiles): """ calculate the geographic bounding box and the required composite image dimensions for the given tiles. """ bbox = None for i, tile in enumerate(tiles): if i == 0: bbox = BBox(tile['bbox'].min, tile['bbox'].max) else: bbox.expand(tile['bbox'].min) bbox.expand(tile['bbox'].max) # Use the first tile bounding box and dimensions - the assumption here is # that all the tile are of the same size, but this is not checked # The fucntion should really assert this assumption. tile_image_width = tiles[0]['img'].size[0] tile_image_height = tiles[0]['img'].size[1] width = round(bbox.width / tiles[0]['bbox'].width) * tile_image_width height = round(bbox.height / tiles[0]['bbox'].height) * tile_image_height return (bbox, (width, height))
def test_BBox(): a = BBox(V2(0, 0), V2(1, 1)) assert (a.min == V2(0, 0)) assert (a.max == V2(1, 1)) a.expand(V2(7, 11)) assert (a.min == V2(0, 0)) assert (a.max == V2(7, 11)) a.expand(V2(-3, -19)) assert (a.min == V2(-3, -19)) assert (a.max == V2(7, 11)) assert (a.width == 10) assert (a.height == 30) # immutability on copy constructor b = BBox(a.min, a.max) b.expand(V2(100, 100)) assert (a.min == V2(-3, -19)) assert (a.max == V2(7, 11)) assert (b.min == V2(-3, -19)) assert (b.max == V2(100, 100))
if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args if __name__ == '__main__': # get arguments args = parse_args() lmdb_data = args.lmdb_data lmdb_landmark = args.lmdb_landmark num_to_read = args.num_to_read save_dir = args.save_dir num_landmarks = args.num_landmarks # read images and landmarks imgs = read_image_from_lmdb(lmdb_data, num_to_read, save_dir) landmarks = read_label_from_lmdb(lmdb_landmark, num_to_read) # visualization bbox = BBox([0, imgs[0].shape[1], 0, imgs[0].shape[0]]) for i in range(num_to_read): draw_landmark_in_cropped_face(imgs[i], bbox.denormalize_landmarks(landmarks[i].reshape(num_landmarks, 2)), os.path.join(save_dir, "recovered_" + str(i) + '.jpg'))
#!/usr/bin/env python from PIL import Image import json from util import BBox, V2 from crop import calculate_composite_bbox_and_dimensions from crop import create_composite_image from crop import geo_json_bbox, calculate_crop from crop import create_boundary_pixels from crop import crop_to_boundary from crop import mask_pixels_outside_boundary tiles = [{ 'bbox': BBox(V2(30, -25), V2(40, -15)), 'img': Image.open('./input/X21Y09.png').convert('RGBA') }, { 'bbox': BBox(V2(30, -35), V2(40, -25)), 'img': Image.open('./input/X21Y10.png').convert('RGBA') }] def run_once(write_outputs=True): # 1. Calculate the composite bounding box and dimensions: (composite_bbox, composite_dims) = calculate_composite_bbox_and_dimensions(tiles) if write_outputs: print('composite image bounding box:', composite_bbox) print('composite image dimensions:', composite_dims) # 2. Create a simple composite image: composite_img = create_composite_image(tiles, composite_bbox,
def generate_test_lmdb(meta_txt, img_base_dir, output_dir, lmdb_name, is_color, img_size, num_landmarks=5, num_to_visualize=4, le_index=0, re_index=1): """ Generate test lmdb data. """ data = getDataFromTxt(meta_txt, img_base_dir, num_landmarks=num_landmarks) # create base dir if not existed if not os.path.exists(output_dir): os.mkdir(output_dir) output = os.path.join(output_dir, lmdb_name) # open image lmdb in_db_img = lmdb.open(output + '_data', map_size=1e12) in_txn_img = in_db_img.begin(write=True) # open landmarks lmdb in_db_lmk = lmdb.open(output + '_landmark', map_size=1e12) in_txn_lmk = in_db_lmk.begin(write=True) # open bbox lmdb in_db_bbox = lmdb.open(output + '_bbox', map_size=1e12) in_txn_bbox = in_db_bbox.begin(write=True) # open eye center lmdb in_db_eye_center = lmdb.open(output + '_eye_center', map_size=1e12) in_txn_eye_center = in_db_eye_center.begin(write=True) # open eye dist lmdb in_db_eyedist = lmdb.open(output + '_eyedist', map_size=1e12) in_txn_eyedist = in_db_eyedist.begin(write=True) display_bbox = BBox([0, img_size, 0, img_size]) for i, (img_path, bbox, landmark) in enumerate(data): # read image if is_color: img = cv2.imread(img_path, 1) else: img = cv2.imread(img_path, 0) assert (img is not None) log("process %d, %s" % (i, img_path)) # make sure that when the face bounding box is incorrectly labeld, # the coordinates, width and height computation is correct enlarged_bbox = bbox enlarged_bbox.misc_clip(img.shape[0], img.shape[1]) face_original = img[int(enlarged_bbox.top):int(enlarged_bbox.bottom) + 1, int(enlarged_bbox.left):int(enlarged_bbox.right) + 1] face_original = cv2.resize(face_original, (img_size, img_size)) landmark_original = enlarged_bbox.normalize_landmarks(landmark) # put original face image and landmark into lmdb in_txn_img = put_image_into_txn(in_txn_img, face_original, i) in_txn_lmk = put_landmark_into_txn(in_txn_lmk, landmark_original, num_landmarks, i) # put bbox into lmdb bbox_info = np.array([ enlarged_bbox.right - enlarged_bbox.left, enlarged_bbox.bottom - enlarged_bbox.top ]) in_txn_bbox = put_label_into_txn(in_txn_bbox, bbox_info, 2, 1, 1, i) # put eye center into lmdb eye_center_info = np.array([ landmark[le_index][0] - landmark[re_index][0], landmark[le_index][1] - landmark[re_index][1] ]) in_txn_eye_center = put_label_into_txn(in_txn_eye_center, eye_center_info, 2, 1, 1, i) # put eyedist into lmdb eyedist = math.sqrt( (landmark_original[le_index][0] - landmark_original[re_index][0]) * (landmark_original[le_index][0] - landmark_original[re_index][0]) + (landmark_original[le_index][1] - landmark_original[re_index][1]) * (landmark_original[le_index][1] - landmark_original[re_index][1])) eyedist = np.asarray(eyedist) in_txn_eyedist = put_label_into_txn(in_txn_eyedist, eyedist, 1, 1, 1, i) # for debugging if i < num_to_visualize: draw_landmark_in_cropped_face( face_original, display_bbox.denormalize_landmarks(landmark_original), os.path.join("visualization", "test_" + os.path.basename(img_path))) draw_landmark_in_original_image( img, enlarged_bbox, landmark, os.path.join("visualization", os.path.basename(img_path))) # commit the transaction per 1000 images if i % 1000 == 0 and i > 0: in_txn_img.commit() in_txn_lmk.commit() in_txn_bbox.commit() in_txn_eye_center.commit() in_txn_eyedist.commit() in_txn_img = in_db_img.begin(write=True) in_txn_lmk = in_db_lmk.begin(write=True) in_txn_bbox = in_db_bbox.begin(write=True) in_txn_eye_center = in_db_eye_center.begin(write=True) in_txn_eyedist = in_db_eyedist.begin(write=True) log("transactions committed, %d images processed." % (i)) in_txn_img.commit() in_txn_lmk.commit() in_txn_bbox.commit() in_txn_eye_center.commit() in_txn_eyedist.commit() in_db_img.close() in_db_lmk.close() in_db_bbox.close() in_db_eye_center.close() in_db_eyedist.close()
def generate_training_lmdb(meta_txt, img_base_dir, output_dir, lmdb_name, is_color, img_size, num_landmarks=5, flipping=True, rotation=True, rotated_flipping=True, rotation_angles=[15, -15], num_to_visualize=4): """ Generate training lmdb data. """ data = getDataFromTxt(meta_txt, img_base_dir, num_landmarks=num_landmarks) # create base dir if not existed if not os.path.exists(output_dir): os.mkdir(output_dir) output = os.path.join(output_dir, lmdb_name) # open image lmdb in_db_img = lmdb.open(output + '_data', map_size=1e12) in_txn_img = in_db_img.begin(write=True) # open landmarks lmdb in_db_lmk = lmdb.open(output + '_landmark', map_size=1e12) in_txn_lmk = in_db_lmk.begin(write=True) count = 0 shuffle_idx = np.random.permutation(500000) display_bbox = BBox([0, img_size, 0, img_size]) for i, (img_path, bbox, landmark) in enumerate(data): # read image if is_color: img = cv2.imread(img_path, 1) else: img = cv2.imread(img_path, 0) assert (img is not None) log("process %d, %s" % (i, img_path)) # make sure that when the face bounding box is incorrectly labeld, # the coordinates, width and height computation is correct enlarged_bbox = bbox enlarged_bbox.misc_clip(img.shape[0], img.shape[1]) face_original = img[int(enlarged_bbox.top):int(enlarged_bbox.bottom) + 1, int(enlarged_bbox.left):int(enlarged_bbox.right) + 1] face_original = cv2.resize(face_original, (img_size, img_size)) landmark_original = enlarged_bbox.normalize_landmarks(landmark) # put original face image and landmark into lmdb in_txn_img = put_image_into_txn(in_txn_img, face_original, shuffle_idx[count]) in_txn_lmk = put_landmark_into_txn(in_txn_lmk, landmark_original, num_landmarks, shuffle_idx[count]) count += 1 # for debugging if i < num_to_visualize: draw_landmark_in_cropped_face( face_original, display_bbox.denormalize_landmarks(landmark_original), os.path.join("visualization", "original_" + os.path.basename(img_path))) draw_landmark_in_original_image( img, enlarged_bbox, landmark, os.path.join("visualization", os.path.basename(img_path))) # flipping if flipping is True: # horizontal flipping face_flipped, bbox_flipped, landmark_flipped = flip( img, enlarged_bbox, landmark) face_flipped = cv2.resize(face_flipped, (img_size, img_size)) landmark_flipped = bbox_flipped.normalize_landmarks( landmark_flipped) # put flipped face image and landmark into lmdb in_txn_img = put_image_into_txn(in_txn_img, face_flipped, shuffle_idx[count]) in_txn_lmk = put_landmark_into_txn(in_txn_lmk, landmark_flipped, num_landmarks, shuffle_idx[count]) count += 1 # for debugging if i < num_to_visualize: draw_landmark_in_cropped_face( face_flipped, display_bbox.denormalize_landmarks(landmark_flipped), os.path.join("visualization", "flipped_" + os.path.basename(img_path))) # rotation if rotation is True: # rotate with probability 100% for alpha in rotation_angles: if np.random.rand() > -1: img_rotated, face_rotated, landmark_rotated = rotate(img, enlarged_bbox, \ landmark, alpha) landmark_rotated = enlarged_bbox.normalize_landmarks( landmark_rotated) face_rotated = cv2.resize(face_rotated, (img_size, img_size)) # put rotated face image and landmark into lmdb in_txn_img = put_image_into_txn(in_txn_img, face_rotated, shuffle_idx[count]) in_txn_lmk = put_landmark_into_txn(in_txn_lmk, landmark_rotated, num_landmarks, shuffle_idx[count]) count += 1 # for debugging if i < num_to_visualize: draw_landmark_in_cropped_face( face_rotated, display_bbox.denormalize_landmarks( landmark_rotated), os.path.join( "visualization", "rotated_" + str(alpha) + "_" + os.path.basename(img_path))) # horizontal flipping after rotation if rotated_flipping is True: face_flipped, bbox_flipped, landmark_flipped = flip( img_rotated, enlarged_bbox, enlarged_bbox.denormalize_landmarks( landmark_rotated)) face_flipped = cv2.resize(face_flipped, (img_size, img_size)) landmark_flipped = bbox_flipped.normalize_landmarks( landmark_flipped) # put rotated flipping face image and landmark into lmdb in_txn_img = put_image_into_txn( in_txn_img, face_flipped, shuffle_idx[count]) in_txn_lmk = put_landmark_into_txn( in_txn_lmk, landmark_flipped, num_landmarks, shuffle_idx[count]) count += 1 # for debugging if i < num_to_visualize: draw_landmark_in_cropped_face( face_flipped, display_bbox.denormalize_landmarks( landmark_flipped), os.path.join( "visualization", "rotated_flip_" + str(alpha) + "_" + os.path.basename(img_path))) # commit the transaction per 1000 images if i % 1000 == 0 and i > 0: in_txn_img.commit() in_txn_lmk.commit() in_txn_img = in_db_img.begin(write=True) in_txn_lmk = in_db_lmk.begin(write=True) log("transactions committed, %d images processed." % (i)) in_txn_img.commit() in_txn_lmk.commit() in_db_img.close() in_db_lmk.close() log("number of total generated entries: %d" % (count))