def prediction_fn(model_dir: str, input_dir: str, output_dir: str = None, config: tf.ConfigProto = None) -> None: """ Given a model directory this function will load the model and apply it to the files (.jpg, .png) found in input_dir. The predictions will be saved in output_dir as .npy files (values ranging [0,255]) :param model_dir: Directory containing the saved model :param input_dir: input directory where the images to predict are :param output_dir: output directory to save the predictions (probability images) :param config: ConfigProto object to pass to the session in order to define which GPU to use :return: """ if not output_dir: # For model_dir of style model_name/export/timestamp/ this will create a folder model_name/predictions' output_dir = '{}'.format(os.path.sep).join( model_dir.split(os.path.sep)[:-3] + ['predictions']) os.makedirs(output_dir, exist_ok=True) filenames_to_predict = glob(os.path.join(input_dir, '*.jp*g')) + glob( os.path.join(input_dir, '*.png')) with tf.Session(config=config): m = LoadedModel(model_dir, predict_mode='filename_original_shape') for filename in tqdm(filenames_to_predict, desc='Prediction'): pred = m.predict(filename)['probs'][0] np.save( os.path.join(output_dir, os.path.basename(filename).split('.')[0]), np.uint8(255 * pred))
def baseline_extraction(model_dir: str, filenames_to_process: List[str], output_dir: str, draw_extractions: bool = False, config: tf.compat.v1.ConfigProto = None) -> None: """ Given a model directory this function will load the model and apply it to the given files. :param model_dir: Directory containing the saved model :param filenames_to_process: filenames of the images to process :param output_dir: output directory to save the predictions (probability images) :param draw_extractions: :param config: ``ConfigProto`` object for ``tf.Session``. :return: """ os.makedirs(output_dir, exist_ok=True) if draw_extractions: drawing_dir = os.path.join(output_dir, 'drawings') os.makedirs(drawing_dir) with tf.compat.v1.Session(config=config): # Load the model m = LoadedModel(model_dir, predict_mode='filename_original_shape') for filename in tqdm(filenames_to_process, desc='Prediction'): # Inference prediction = m.predict(filename) # Take the first element of the 'probs' dictionary (batch size = 1) probs = prediction['probs'][0] original_shape = probs.shape # The baselines probs are on the second channel baseline_probs = probs[:, :, 1] contours, _ = line_extraction_v1(baseline_probs, low_threshold=0.2, high_threshold=0.4, sigma=1.5) basename = os.path.basename(filename).split('.')[0] # Compute the ratio to save the coordinates in the original image coordinates reference. ratio = (original_shape[0] / probs.shape[0], original_shape[1] / probs.shape[1]) xml_filename = os.path.join(output_dir, basename + '.xml') page_object = PAGE.save_baselines( xml_filename, contours, ratio, predictions_shape=probs.shape[:2]) # If specified, saves the images with the annotated baslines if draw_extractions: image = imread(filename) page_object.draw_baselines(image, color=(255, 0, 0), thickness=5) basename = os.path.basename(filename) imsave(os.path.join(drawing_dir, basename), image)
def main(input_dir, output_dir, model_dir, classes_file): if not os.path.isdir(input_dir): print('No such input directory: {}'.format(input_dir)) sys.exit(1) if not os.path.isdir(model_dir): print('No such model directory: {}'.format(model_dir)) sys.exit(2) if not os.path.isfile(classes_file): print('No such classes file: {}'.format(classes_file)) sys.exit(3) input_files = glob('{}/*'.format(input_dir)) raw_dir = os.path.join(output_dir, 'raw') raw_overlays_dir = os.path.join(output_dir, 'raw_overlays') os.makedirs(raw_dir, exist_ok=True) os.makedirs(raw_overlays_dir, exist_ok=True) # Shape [num_classes, 3] (3 is for RGB) class_colors = np.array(get_classes_color_from_file(classes_file), dtype=np.uint8) num_classes = class_colors.shape[0] with tf.Session(): m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(input_files, desc='Processed files'): rootname, _ = os.path.splitext(filename) basename = os.path.basename(rootname + '.png') # For each image, predict each pixel's label prediction_outputs = m.predict(filename) probs = prediction_outputs['probs'][0] original_shape = prediction_outputs['original_shape'] assert probs.shape[2] == num_classes # Shape: (h, w) class_map = probs.argmax(axis=-1) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) class_map_upscaled = cv2.resize(class_map.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Shape: (h', w', 3) color_map = np.take(class_colors, class_map_upscaled, axis=0) raw = Image.fromarray(color_map) raw.save(os.path.join(raw_dir, basename), 'PNG') Image.fromarray(color_map) original_img = Image.open(filename).convert('RGBA') predicted_mask = Image.fromarray(color_map).convert('RGBA') raw_overlay = Image.blend(original_img, predicted_mask, 0.5) raw_overlay.save(os.path.join(raw_overlays_dir, basename), 'PNG')
def page_extraction(model_dir: str, filenames_to_process: List[str], output_dir: str, draw_extractions: bool=False, config: tf.compat.v1.ConfigProto=None): os.makedirs(output_dir, exist_ok=True) if draw_extractions: drawing_dir = os.path.join(output_dir, 'drawings') os.makedirs(drawing_dir) with tf.compat.v1.Session(config=config): # Load the model m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(filenames_to_process, desc='Prediction'): # Inference prediction = m.predict(filename) probs = prediction['probs'][0] original_shape = prediction['original_shape'] probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions page_bin = page_post_processing_fn(probs, threshold=-1) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Find quadrilateral enclosing the page pred_page_coords = find_boxes(bin_upscaled.astype(np.uint8, copy=False), mode='min_rectangle', min_area=0.2, n_max_boxes=1) if pred_page_coords is not None: # Write corners points into a .txt file # Create page region and XML file page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :])) if draw_extractions: # Draw page box on original image and export it. Add also box coordinates to the txt file original_img = imread(filename, pilmode='RGB') cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5) basename = os.path.basename(filename).split('.')[0] imsave(os.path.join(drawing_dir, '{}_boxes.jpg'.format(basename)), original_img) else: print('No box found in {}'.format(filename)) page_border = PAGE.Border() page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border) xml_filename = os.path.join(output_dir, '{}.xml'.format(basename)) page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
def run(testDir, modelDir, outDir, gpu, _config): # Create output directory os.makedirs(outDir, exist_ok=True) # I/O files = glob(testDir + '/*') # Store coordinates of page in a .txt file txt_coordinates = '' models = list() mask_unused_gpus(2) with tf.Session(): # Start a tensorflow session # Load the model # model = LoadedModel(modelDir, modelName, predict_mode='filename') model = LoadedModel(modelDir, predict_mode='filename') for filename in tqdm(files, desc='Processed files'): basename = os.path.basename(filename).split('.')[0] if os.path.exists(os.path.join(outDir, basename + '.xml')): print(basename + " skipped...") # print("predict filename:" + filename) prediction_outputs = model.predict(filename) probs = prediction_outputs['probs'][0] probs = probs.astype(float) # probs = probs / np.max(probs) imgPath = os.path.join(testDir, filename) # print("loading:" + imgPath) img = imread(filename) # pageSeparatorsToXml(probs, img.shape, filename, outDir) probsN = probs / np.max(probs) # Normalize to be in [0, 1] imsave(os.path.join(outDir, basename + '-articles.png'), convert_image(probs))
output_dir = 'demo/processed_images' os.makedirs(output_dir, exist_ok=True) # PAGE XML format output output_pagexml_dir = os.path.join(output_dir, PAGE_XML_DIR) os.makedirs(output_pagexml_dir, exist_ok=True) # Store coordinates of page in a .txt file txt_coordinates = '' with tf.compat.v1.Session(): # Start a tensorflow session # Load the model m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(input_files, desc='Processed files'): # For each image, predict each pixel's label prediction_outputs = m.predict(filename) probs = prediction_outputs['probs'][0] original_shape = prediction_outputs['original_shape'] probs = probs[:, :, 1] # Take only class '1' (class 0 is the background, class 1 is the page) probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions page_bin = page_make_binary_mask(probs) # Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) # Find quadrilateral enclosing the page
# Read image img = cv2.imread(input_path) # Parse filename basename = os.path.basename(input_path) basename_wo_ext = os.path.splitext(basename)[0] fl.write("input_path\t\t: {}\n".format(input_path)) fl.write("basename\t\t: {}\n".format(basename)) fl.write("basename (w/o ext)\t: {}\n".format(basename_wo_ext)) """ 2. Main Run prediction """ # Run prediction prediction_outputs = m.predict(input_path) pred_labels = np.copy(prediction_outputs['labels'][0]).astype( np.uint8) """ 2. Main Get basic attributes """ oriH, oriW = np.shape(img)[:2] newH, newW = np.shape(pred_labels) data_page.set("HEIGHT", str(oriH)) data_page.set("WIDTH", str(oriW)) """ 3. Postprocessing
class TextLineDetector(): def __init__(self, model_dir, debug=True): self.sess = tf.Session() with self.sess.as_default(): self.model = LoadedModel(model_dir, predict_mode="image") self.debug = debug self.debug_dir = Path("./dhSegment_debug/") self.debug_dir.mkdir(exist_ok=True) def __exit__(): self.sess.close() def detect(self, img, model_predict_h_w=None): ''' Input: img: a BGR image (np.ndarray) Return: cv2 style contours Reference: https://github.com/dhlab-epfl/fdh-tutorials/blob/master/computer-vision-deep-learning/3-applications/dl-document-processing-textlines/fdh_document_processing.ipynb ''' assert isinstance(img, np.ndarray) and len(img.shape) == 3 assert model_predict_h_w is None or isinstance(model_predict_h_w, tuple) with self.sess.as_default(): # Deep Learning based textline detection start = time.time() # Note: the model takes RGB image as input output_textline = self.model.predict(img[:, :, [2, 1, 0]]) if self.debug: print("[!] The model took {} to predict this image".format( time.time() - start)) textline_probs = output_textline['probs'][0, :, :, 1] if self.debug: plt.imshow(textline_probs) plt.savefig(str(self.debug_dir / "textline_probability.png")) # The higher the sigma, the less number of textlines we have textline_probs2 = cleaning_probs(textline_probs, sigma=1) textline_mask = hysteresis_thresholding(textline_probs2, low_threshold=0.3, high_threshold=0.6, candidates_mask=None) if self.debug: plt.imshow(textline_mask) plt.savefig(str(self.debug_dir / "textline_mask.png")) start = time.time() line_contours = line_vectorization.find_lines( resize(textline_mask, img.shape[0:2])) if self.debug: print("[!] Find lines took {} secs".format(time.time() - start)) if self.debug: drawn_line_img = cv2.drawContours(img.copy(), line_contours, -1, (0, 255, 0), thickness=3) cv2.imwrite(str(self.debug_dir / "drawn_line_img.png"), drawn_line_img) return line_contours
"""Download a single test image using URL to local server""" # Download image !wget -O 'my_test_image.jp2' -A jpeg,jpg,jp2 $TEST_IMAGE_URL # Convert format from jp2 to jpg img = cv2.imread('./my_test_image.jp2',cv2.IMREAD_GRAYSCALE) cv2.imwrite('./images/my_test_image.jpg',img) # Remove jp2 !rm my_test_image.jp2 """Load a test image""" file_to_process = './dataset/ENP_500/train/images/00674399.jpg' img = cv2.imread(file_to_process) """Run prediction""" prediction_outputs = m.predict(file_to_process) """POST PROCESSING""" # Generate polygon regions CONNECTIVITY = 8 THRESHOLD_SM_ZONE = 200*200 # Prepare img_copy for drawing Bounding-box img_bb = np.copy(img) img_pg = np.copy(img) newH,newW = np.shape(img)[:2] # Build figure mask from prediction pred_figures = np.copy(prediction_outputs['labels'][0]).astype(np.uint8) prob_figures = np.copy(prediction_outputs['probs'][0][:,:,2]) mask_figures = np.copy(pred_figures)
print("") print(" Post-Model Training") print("") # For each image for filename in tqdm(input_file_list, desc='Processed files'): basename = os.path.basename(filename).split('.')[0] page_number_parsed = -1 try: page_number_parsed = int(basename.split("_")[-1]) except Exception: pass start_time_sec = time.time() # # predict each pixel's label # prediction_outputs = tf_model.predict(filename) finish_time_sec = time.time() # labels_all has shape (h,w) which is like (976, 737) labels_all = prediction_outputs['labels'][0] probs_all = prediction_outputs['probs'][0] # probs_all have shape like (976, 737, 4) corresponding to (H, W, Nclasses) original_shape = prediction_outputs['original_shape'] if (page_number_counted != page_number_parsed): page_number = page_number_parsed if not warned_on_file_ordering: warned_on_file_ordering = True print( f" WARN: page number mismatch: parse={page_number_parsed} vs. count={page_number_counted}" ) else: page_number = page_number_counted
def evaluate_fnc(model_dir): model_name = os.path.basename(os.path.normpath(model_dir)) model_json_file = os.path.join(model_dir, 'config.json') if not os.path.isfile(model_json_file): print('Sorry, I could not load the config.json file') return with open(model_json_file) as f: data = json.load(f) train_data_dir = data["train_data"] print('train data: ' + train_data_dir) # simply generate the test data dir, by replacing the last occurence of /train with /test test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val', train_data_dir) # test_data_dir = re.sub('(/train)(?!.*/train)', '/test', train_data_dir) print('saving to : ' + test_data_dir) # test_data_dir = re.sub('(/train)(?!.*/train)', '/train_and_val', train_data_dir) test_data_dir = os.path.join(test_data_dir, 'images') print('test_data_dir: ' + test_data_dir) if not os.path.isdir(test_data_dir): print('Sorry, the test folder is not existing: ' + test_data_dir) return # simply generate the results dir, by using a 'result' child of the parent folder of the train_data_dir result_parent_dir = os.path.join(os.path.dirname(train_data_dir), 'results_train_and_val') output_dir = os.path.join(result_parent_dir, model_name) # TODO: read this from json use_ms = True # TODO: is this necessary? model_dir = os.path.join(model_dir, 'export') if use_ms: input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'), recursive=False) + \ glob(os.path.join(test_data_dir, '*.png'), recursive=False) input_image_filenames = [ re.sub(r'_\d\d?', '', f) for f in input_image_filenames ] # input_image_filenames = [re.sub(r'_\d', '', f) for f in input_image_filenames] input_files = set(input_image_filenames) print('Found {} MSI images'.format(len(input_files))) else: input_image_filenames = glob(os.path.join(test_data_dir, '*.jpg'), recursive=False) + \ glob(os.path.join(test_data_dir, '*.png'), recursive=False) input_files = input_image_filenames print('Found {} images'.format(len(input_files))) mask_unused_gpus.mask_unused_gpus(2) os.makedirs(output_dir, exist_ok=True) with tf.Session(): # Start a tensorflow session # Load the model m = LoadedModel(model_dir, predict_mode='filename') total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters # print('number of network parameters: ' + str(total_parameters)) # Iterate over the images: for filename in tqdm(input_files, desc='Processed files'): prediction_outputs = m.predict(filename) probs = prediction_outputs['probs'][0] original_shape = prediction_outputs['original_shape'] if use_ms: filename = re.sub(r'.png', '_2.png', filename) img = cv.imread(filename, cv.IMREAD_COLOR) # just use the fg class: p = probs[:, :, 1] * 255 bin_upscaled = cv.resize(p.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv.INTER_CUBIC) b = (bin_upscaled > 127) * 255 b = np.array(b, dtype=np.uint8) b_rgb = np.zeros( (bin_upscaled.shape[0], bin_upscaled.shape[1], 3)) b_rgb[:, :, 1] = b # do we have a second fg class? if (probs.shape[2] == 3): p_fg2 = probs[:, :, 2] * 255 bin_upscaled_fg2 = cv.resize(p_fg2.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv.INTER_CUBIC) b_fg2 = (bin_upscaled_fg2 > 127) * 255 b_fg2 = np.array(b_fg2, dtype=np.uint8) b_rgb[:, :, 2] = b_fg2 filename = re.sub(r'_\d.png', '.png', os.path.basename(filename)) full_filename = os.path.join(output_dir, filename) cv.imwrite(full_filename, b_rgb)
def main(input_dir, model_dir, out_dir, raw_out_dir=None, min_area=0.0005, overlay_alpha=127, box_color=(255, 0, 0)): os.makedirs(out_dir, exist_ok=True) if raw_out_dir: os.makedirs(raw_out_dir, exist_ok=True) input_files = glob('{}/*'.format(input_dir)) with tf.Session(): # Load the model m = LoadedModel(model_dir, predict_mode='filename') for filename in tqdm(input_files, desc='Processed files'): basename = os.path.basename(filename).split('.')[0] # For each image, predict each pixel's label prediction_outputs = m.predict(filename) probs = prediction_outputs['probs'][0] original_shape = prediction_outputs['original_shape'] # Take only class '1' # (class 0 is the background, class 1 is the annotation.) probs = probs[:, :, 1] probs = probs / np.max(probs) # Normalize to be in [0, 1] # Binarize the predictions preds_bin = make_binary_mask(probs) # Upscale to have full resolution image # (cv2 uses (w,h) and not (h,w) for giving shapes) bin_upscaled = cv2.resize(preds_bin.astype(np.uint8, copy=False), tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST) if raw_out_dir: # If requested, draw the binary mask as an overlay # over the image and save it. img = Image.open(filename) img = img.convert('RGBA') overlay_arr = np.stack( [ bin_upscaled * box_color[0], # R bin_upscaled * box_color[1], # G bin_upscaled * box_color[2], # B np.ones_like(bin_upscaled) * overlay_alpha # A ], axis=2) overlay = Image.fromarray(overlay_arr, mode='RGBA') img.paste(overlay, (0, 0), overlay) img.save( os.path.join(raw_out_dir, '{}_raw.png'.format(basename)), 'PNG') # Find quadrilateral enclosing the page boxes = boxes_detection.find_boxes( bin_upscaled.astype(np.uint8, copy=False), min_area=min_area, mode='min_rectangle', ) # Draw boxes on original image. original_img = imread(filename, pilmode='RGB') if boxes is not None: cv2.polylines(original_img, boxes, True, box_color, thickness=5) else: print('No annotation found in {}'.format(filename)) imsave(os.path.join(out_dir, '{}_boxes.jpg'.format(basename)), original_img)