def get_qualitative_results(denoise_func):
    sclite.clear()
    for i in tqdm(range(1, len(test_ds))):
        image, text = test_ds[i]
        resized_image = paragraph_segmentation_transform(image, image_size=form_size)
        paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx))
        paragraph_bb = paragraph_bb[0].asnumpy()
        paragraph_bb = expand_bounding_box(paragraph_bb, expand_bb_scale_x=0.01,
                                               expand_bb_scale_y=0.01)
        paragraph_segmented_image = crop_handwriting_page(image, paragraph_bb, image_size=segmented_paragraph_size)
        word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx)        
        line_bbs = sort_bbs_line_by_line(word_bb, y_overlap=0.4)
        line_images = crop_line_images(paragraph_segmented_image, line_bbs)

        predicted_text = []
        for line_image in line_images:
            line_image = exposure.adjust_gamma(line_image, 1)
            line_image = handwriting_recognition_transform(line_image, line_image_size)
            character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx))
            decoded_text = denoise_func(character_probabilities)
            predicted_text.append(decoded_text)

        actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&")
        actual_text = actual_text.split("\n")
        if len(predicted_text) > len(actual_text):
            predicted_text = predicted_text[:len(actual_text)]
        sclite.add_text(predicted_text, actual_text)
    
    cer, _ = sclite.get_cer()
    print("Mean CER = {}".format(cer))
    return cer
Пример #2
0
def predict():
    if request.method == 'POST':
        # read image file string data
        if 'file' not in request.files:
            return redirect(url_for('home'))
        else:
            filestr = request.files['file'].read()
            # convert string data to numpy array
            npimg = np.fromstring(filestr, np.uint8)
            # convert numpy array to image
            img = cv2.imdecode(npimg, cv2.IMREAD_GRAYSCALE)
            ctx = ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu()

            # Models
            paragraph_segmentation_net = SegmentationNetwork(ctx=ctx)
            paragraph_segmentation_net.cnn.load_parameters(paragraph_segmentation_model, ctx)

            word_segmentation_net = WordSegmentationNet(2, ctx=ctx)
            word_segmentation_net.load_parameters(word_segmentation_model, ctx)

            handwriting_line_recognition_net = HandwritingRecognitionNet(rnn_hidden_states=rnn_hidden_states,
                                                                         rnn_layers=rnn_layers,
                                                                         max_seq_len=max_seq_len,
                                                                         ctx=ctx)
            handwriting_line_recognition_net.load_parameters(recognition_model, ctx)

            MAX_IMAGE_SIZE_FORM = (1120, 800)

            img_arr = np.asarray(img)

            resized_image = paragraph_segmentation_transform(img_arr, image_size=MAX_IMAGE_SIZE_FORM)
            paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx))
            paragraph_segmented_image = crop_handwriting_page(img_arr, paragraph_bb[0].asnumpy(),
                                                              image_size=segmented_paragraph_size)
            word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk,
                                             ctx)
            line_bbs = sort_bbs_line_by_line(word_bb)
            line_images = crop_line_images(paragraph_segmented_image, line_bbs)

            predicted_text = []

            for line_image in line_images:
                line_image = handwriting_recognition_transform(line_image, line_image_size)
                character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx))
                decoded_text = denoise_func(character_probabilities)
                predicted_text.append(decoded_text)
            text = ' '.join(predicted_text)
            print(text)
            translated = gc_translate(text)
            sentiment = gc_sentiment(text)
            print(translated)
            print(sentiment)
            text_dict = {"text": text,
                         "translated": translated['translatedText'],
                         "polarity": sentiment[0],
                         "magnitude": sentiment[1]}

            a = text_dict
            return render_template('result.html', prediction=a)
paragraph_segmentation_net = SegmentationNetwork(ctx=ctx)
paragraph_segmentation_net.cnn.load_parameters(
    "models/paragraph_segmentation2.params", ctx=ctx)

paragraph_segmentation_net.hybridize()
form_size = (1120, 800)

predicted_bbs = []

fig, axs = plt.subplots(int(len(images) / 2),
                        2,
                        figsize=(15, 9 * len(images) / 2))
for i, image in enumerate(images):
    s_y, s_x = int(i / 2), int(i % 2)
    resized_image = paragraph_segmentation_transform(image, form_size)
    bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx))
    bb_predicted = bb_predicted[0].asnumpy()
    bb_predicted = expand_bounding_box(bb_predicted,
                                       expand_bb_scale_x=0.03,
                                       expand_bb_scale_y=0.03)
    predicted_bbs.append(bb_predicted)

    axs[s_y, s_x].imshow(image, cmap='Greys_r')
    axs[s_y, s_x].set_title("{}".format(i))

    (x, y, w, h) = bb_predicted
    image_h, image_w = image.shape[-2:]
    (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)
    rect = patches.Rectangle((x, y), w, h, fill=False, color="r", ls="--")
    axs[s_y, s_x].add_patch(rect)
def generate_op(img_n, img_dir, folder_path):
    image_name = img_n.split('.')[0]
    img_path = os.path.join(img_dir, img_n)
    image = _pre_process_image(img_path, 'form')

    form_size = (1120, 800)

    predicted_bbs = []

    resized_image = paragraph_segmentation_transform(image, form_size)
    bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx))
    bb_predicted = bb_predicted[0].asnumpy()
    bb_predicted = expand_bounding_box(bb_predicted,
                                       expand_bb_scale_x=0.03,
                                       expand_bb_scale_y=0.03)
    predicted_bbs.append(bb_predicted)

    (x, y, w, h) = bb_predicted
    image_h, image_w = image.shape[-2:]
    (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    segmented_paragraph_size = (700, 700)
    paragraph_segmented_images = []

    bb = predicted_bbs[0]
    image = crop_handwriting_page(image,
                                  bb,
                                  image_size=segmented_paragraph_size)
    paragraph_segmented_images.append(image)

    min_c = 0.1
    overlap_thres = 0.1
    topk = 600
    predicted_words_bbs_array = []

    for i, paragraph_segmented_image in enumerate(paragraph_segmented_images):
        predicted_bb = predict_bounding_boxes(word_segmentation_net,
                                              paragraph_segmented_image, min_c,
                                              overlap_thres, topk, ctx)

        predicted_words_bbs_array.append(predicted_bb)
        for j in range(predicted_bb.shape[0]):
            (x, y, w, h) = predicted_bb[j]
            image_h, image_w = paragraph_segmented_image.shape[-2:]
            (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    line_images_array = []

    for i, paragraph_segmented_image in enumerate(paragraph_segmented_images):
        predicted_bbs = predicted_words_bbs_array[i]
        line_bbs = sort_bbs_line_by_line(predicted_bbs, y_overlap=0.4)
        line_images = crop_line_images(paragraph_segmented_image, line_bbs)
        line_images_array.append(line_images)

        for line_bb in line_bbs:
            (x, y, w, h) = line_bb
            image_h, image_w = paragraph_segmented_image.shape[-2:]
            (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    line_image_size = (60, 800)
    character_probs = []
    for line_images in line_images_array:
        form_character_prob = []
        for i, line_image in enumerate(line_images):
            line_image = handwriting_recognition_transform(
                line_image, line_image_size)
            line_character_prob = handwriting_line_recognition_net(
                line_image.as_in_context(ctx))
            form_character_prob.append(line_character_prob)
        character_probs.append(form_character_prob)

    FEATURE_LEN = 150
    save_path = os.path.join(folder_path, image_name + '.txt')
    file = open(save_path, 'w')

    for i, form_character_probs in enumerate(character_probs):
        for j, line_character_probs in enumerate(form_character_probs):
            decoded_line_bs = get_beam_search(line_character_probs)
            print(decoded_line_bs)
            file.write(decoded_line_bs + ' ')
    file.close()