def get_qualitative_results(denoise_func): sclite.clear() for i in tqdm(range(1, len(test_ds))): image, text = test_ds[i] resized_image = paragraph_segmentation_transform(image, image_size=form_size) paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx)) paragraph_bb = paragraph_bb[0].asnumpy() paragraph_bb = expand_bounding_box(paragraph_bb, expand_bb_scale_x=0.01, expand_bb_scale_y=0.01) paragraph_segmented_image = crop_handwriting_page(image, paragraph_bb, image_size=segmented_paragraph_size) word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx) line_bbs = sort_bbs_line_by_line(word_bb, y_overlap=0.4) line_images = crop_line_images(paragraph_segmented_image, line_bbs) predicted_text = [] for line_image in line_images: line_image = exposure.adjust_gamma(line_image, 1) line_image = handwriting_recognition_transform(line_image, line_image_size) character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx)) decoded_text = denoise_func(character_probabilities) predicted_text.append(decoded_text) actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&") actual_text = actual_text.split("\n") if len(predicted_text) > len(actual_text): predicted_text = predicted_text[:len(actual_text)] sclite.add_text(predicted_text, actual_text) cer, _ = sclite.get_cer() print("Mean CER = {}".format(cer)) return cer
def predict(): if request.method == 'POST': # read image file string data if 'file' not in request.files: return redirect(url_for('home')) else: filestr = request.files['file'].read() # convert string data to numpy array npimg = np.fromstring(filestr, np.uint8) # convert numpy array to image img = cv2.imdecode(npimg, cv2.IMREAD_GRAYSCALE) ctx = ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu() # Models paragraph_segmentation_net = SegmentationNetwork(ctx=ctx) paragraph_segmentation_net.cnn.load_parameters(paragraph_segmentation_model, ctx) word_segmentation_net = WordSegmentationNet(2, ctx=ctx) word_segmentation_net.load_parameters(word_segmentation_model, ctx) handwriting_line_recognition_net = HandwritingRecognitionNet(rnn_hidden_states=rnn_hidden_states, rnn_layers=rnn_layers, max_seq_len=max_seq_len, ctx=ctx) handwriting_line_recognition_net.load_parameters(recognition_model, ctx) MAX_IMAGE_SIZE_FORM = (1120, 800) img_arr = np.asarray(img) resized_image = paragraph_segmentation_transform(img_arr, image_size=MAX_IMAGE_SIZE_FORM) paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx)) paragraph_segmented_image = crop_handwriting_page(img_arr, paragraph_bb[0].asnumpy(), image_size=segmented_paragraph_size) word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx) line_bbs = sort_bbs_line_by_line(word_bb) line_images = crop_line_images(paragraph_segmented_image, line_bbs) predicted_text = [] for line_image in line_images: line_image = handwriting_recognition_transform(line_image, line_image_size) character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx)) decoded_text = denoise_func(character_probabilities) predicted_text.append(decoded_text) text = ' '.join(predicted_text) print(text) translated = gc_translate(text) sentiment = gc_sentiment(text) print(translated) print(sentiment) text_dict = {"text": text, "translated": translated['translatedText'], "polarity": sentiment[0], "magnitude": sentiment[1]} a = text_dict return render_template('result.html', prediction=a)
def get_qualitative_results_lines(denoise_func): sclite.clear() test_ds_line = IAMDataset("line", train=False) for i in tqdm(range(1, len(test_ds_line))): image, text = test_ds_line[i] line_image = exposure.adjust_gamma(image, 1) line_image = handwriting_recognition_transform(line_image, line_image_size) character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx)) decoded_text = denoise_func(character_probabilities) actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&") sclite.add_text([decoded_text], [actual_text]) cer, er = sclite.get_cer() print("Mean CER = {}".format(cer)) return cer
handwriting_line_recognition_net = HandwritingRecognitionNet(rnn_hidden_states=512, rnn_layers=2, ctx=ctx, max_seq_len=160) handwriting_line_recognition_net.load_parameters("models/handwriting_line8.params", ctx=ctx) #%% im = np.array(train_dataset._get_img('cow')) plt.imshow(im) #%% line_image_size = (60, 800) line_image = handwriting_recognition_transform(np.array(im).mean(axis=2), line_image_size) line_character_prob = handwriting_line_recognition_net(line_image.as_in_context(ctx)) #%% plt.figure(figsize=(15,15)) plt.imshow(line_image[0,0,:,:].asnumpy(), cmap='gray') #%% from ocr.utils.CTCDecoder.BeamSearch import ctcBeamSearch #%% def get_beam_search(prob, width=20, k=10):
rect = patches.Rectangle((x, y), w, h, fill=False, color="r") axs[s_y, s_x].add_patch(rect) handwriting_line_recognition_net = HandwritingRecognitionNet( rnn_hidden_states=512, rnn_layers=2, ctx=ctx, max_seq_len=160) handwriting_line_recognition_net.load_parameters( "models/handwriting_line8.params", ctx=ctx) handwriting_line_recognition_net.hybridize() line_image_size = (60, 800) character_probs = [] for line_images in line_images_array: form_character_prob = [] for i, line_image in enumerate(line_images): line_image = handwriting_recognition_transform(line_image, line_image_size) line_character_prob = handwriting_line_recognition_net( line_image.as_in_context(ctx)) form_character_prob.append(line_character_prob) character_probs.append(form_character_prob) def get_arg_max(prob): ''' The greedy algorithm convert the output of the handwriting recognition network into strings. ''' arg_max = prob.topk(axis=2).asnumpy() return decoder_handwriting(arg_max)[0]
def generate_op(img_n, img_dir, folder_path): image_name = img_n.split('.')[0] img_path = os.path.join(img_dir, img_n) image = _pre_process_image(img_path, 'form') form_size = (1120, 800) predicted_bbs = [] resized_image = paragraph_segmentation_transform(image, form_size) bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx)) bb_predicted = bb_predicted[0].asnumpy() bb_predicted = expand_bounding_box(bb_predicted, expand_bb_scale_x=0.03, expand_bb_scale_y=0.03) predicted_bbs.append(bb_predicted) (x, y, w, h) = bb_predicted image_h, image_w = image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) segmented_paragraph_size = (700, 700) paragraph_segmented_images = [] bb = predicted_bbs[0] image = crop_handwriting_page(image, bb, image_size=segmented_paragraph_size) paragraph_segmented_images.append(image) min_c = 0.1 overlap_thres = 0.1 topk = 600 predicted_words_bbs_array = [] for i, paragraph_segmented_image in enumerate(paragraph_segmented_images): predicted_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx) predicted_words_bbs_array.append(predicted_bb) for j in range(predicted_bb.shape[0]): (x, y, w, h) = predicted_bb[j] image_h, image_w = paragraph_segmented_image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) line_images_array = [] for i, paragraph_segmented_image in enumerate(paragraph_segmented_images): predicted_bbs = predicted_words_bbs_array[i] line_bbs = sort_bbs_line_by_line(predicted_bbs, y_overlap=0.4) line_images = crop_line_images(paragraph_segmented_image, line_bbs) line_images_array.append(line_images) for line_bb in line_bbs: (x, y, w, h) = line_bb image_h, image_w = paragraph_segmented_image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) line_image_size = (60, 800) character_probs = [] for line_images in line_images_array: form_character_prob = [] for i, line_image in enumerate(line_images): line_image = handwriting_recognition_transform( line_image, line_image_size) line_character_prob = handwriting_line_recognition_net( line_image.as_in_context(ctx)) form_character_prob.append(line_character_prob) character_probs.append(form_character_prob) FEATURE_LEN = 150 save_path = os.path.join(folder_path, image_name + '.txt') file = open(save_path, 'w') for i, form_character_probs in enumerate(character_probs): for j, line_character_probs in enumerate(form_character_probs): decoded_line_bs = get_beam_search(line_character_probs) print(decoded_line_bs) file.write(decoded_line_bs + ' ') file.close()