def recognize(): if request.method == 'POST': # start = current_milli_time() if 'image' not in request.files: print('No file part') return render_template('index.html') file = request.files['image'] if file.filename == '': print('No selected file') return render_template('index.html') if file: file.save(os.path.join('images',file.filename)) filename = file.filename # load the trained model charClass = Graph(MODEL_LOC) # load the saved image image = cv2.cvtColor(cv2.imread("images/"+filename), cv2.COLOR_BGR2RGB) crop = page.detection(image) bBoxes = words.detection(crop) cycler = Cycler.Cycler(crop,bBoxes,charClass) allWords = [] for i in range(len(bBoxes)-1): allWords.append(cycler.idxImage(i)) # print(allWords[i]) # stop = current_milli_time() # print(stop - start) return jsonify(allWords=allWords)
def process_image(imag): # %matplotlib inline IMG = imag plt.rcParams['figure.figsize'] = (15.0, 10.0) """### Global Variables""" """## Load image""" image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB) implt(image) # Crop image and get bounding boxes crop = page.detection(image) implt(crop) boxes = words.detection(crop) lines = words.sort_words(boxes) implt(crop) output_file = open("templates/output.html", 'w+') output_file.write("") output_file.close() output_file = open("templates/output.html", 'a+') for line in lines: print(" ".join( [recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line])) for (x1, y1, x2, y2) in line: text_det = recognise(crop[y1:y2, x1:x2]) output_file.write(text_det + " ") output_file.write("\n") output_file.close()
def apply_with_source_info(self, im, source_info): image = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # Crop image and get bounding boxes im = page.detection(image) return im
def test_recognize(): charClass = Graph(MODEL_LOC) # load the saved image image = cv2.cvtColor(cv2.imread(FILE_LOC), cv2.COLOR_BGR2RGB) crop = page.detection(image) bBoxes = words.detection(crop) cycler = Cycler.Cycler(crop, bBoxes, charClass) allWords = [] for i in range(len(bBoxes) - 1): allWords.append(cycler.idxImage(i)) isStructEmpty = structEmpty(allWords) assert isStructEmpty == False
def main(): Infilename = sys.argv[1] image = cv2.cvtColor(cv2.imread(Infilename), cv2.COLOR_BGR2RGB) crop = page.detection(image) boxes = words.detection(crop) lines = words.sort_words(boxes) crop = cv2.cvtColor(crop, cv2.COLOR_RGB2GRAY) imLines = [] for line in lines: imLine = [] for (x1, y1, x2, y2) in line: imLine.append(crop[y1:y2, x1:x2]) imLines.append(imLine) decoderType = DecoderType.WordBeamSearch #decoderType = DecoderType.BeamSearch #decoderType = DecoderType.BestPath model = Model(open('../model/charList.txt').read(), decoderType, mustRestore=True) file1 = open("myfile.txt", "w") recognizedL = [] print( "-------------------Predicted Handwritten Text-------------------------" ) for line in imLines: imgs = [] for word in line: imgs.append(preprocess(word, Model.imgSize)) batch = Batch(None, imgs) (recognized, probability) = model.inferBatch(batch, True) l = "" for pw in recognized: l += pw l += ' ' print(pw, end=" ") print() l += '\n' recognizedL.append(l) file1.writelines(recognizedL) file1.close()
def textRecog(infilename): image = cv2.cvtColor(cv2.imread(infilename), cv2.COLOR_BGR2RGB) crop = page.detection(image) boxes = words.detection(crop) lines = words.sort_words(boxes) crop = cv2.cvtColor(crop, cv2.COLOR_RGB2GRAY) imLines = [] for line in lines: imLine = [] for (x1, y1, x2, y2) in line: imLine.append(crop[y1:y2, x1:x2]) imLines.append(imLine) #decoderType = DecoderType.WordBeamSearch #decoderType = DecoderType.BeamSearch decoderType = DecoderType.BestPath model = Model(open('./model/charList.txt').read(), decoderType, mustRestore=True) #file1 = open("myfile.txt", "w") recognizedText = "" for line in imLines: imgs = [] for word in line: imgs.append(preprocess(word, Model.imgSize)) batch = Batch(None, imgs) (recognized, probability) = model.inferBatch(batch, True) l = "" for pw in recognized: l += pw l += ' ' recognizedText += l return recognizedText
def fun(list1): import numpy as np import pandas as pd import matplotlib.pyplot as plt import tensorflow as tf import cv2 import random # Import costume functions, corresponding to notebooks from ocr.normalization import imageNorm, letterNorm from ocr import page, words #from ocr import charSeg from ocr.helpers import implt, resize from ocr.tfhelpers import Graph from ocr.datahelpers import idx2char from src import gsmain import glob import os # ### Global Variables for x_file in list1: tf.reset_default_graph() str1 = ''.join(x_file) # Settings IMG = str1 # 1, 2, 3 # ## Load image image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB) implt(image) # Crop image and get bounding boxes crop = page.detection(image) implt(crop) bBoxes = words.detection(crop) lines = words.sort_words(bBoxes) filelist = glob.glob("./new3/*.png") for file in filelist: os.remove(file) indeximg = 0 nl = 0 for line in lines: for (x1, y1, x2, y2) in line: cv2.imwrite("new3/" + str(indeximg) + ".png", crop[y1:y2, x1:x2]) #implt(cv2.imread("outcheck.png")) indeximg = indeximg + 1 #gsmain.FilePaths.fnInfer = ["outcheck.png"] #wordreco = gsmain.main() #file.write(wordreco + ' ') cv2.imwrite("new3/" + str(nl) + "space.png", crop[y1:y2, x1:x2]) nl = nl + 1 #Get all segmented words list2 = glob.glob("./new3/*.png") list2.sort(key=os.path.getmtime) '''list2=list() for ii in range(0,indeximg): list2.append("")''' #all files which have to be infer are loaded rand_num = random.randint(100, 1000) cv2.imwrite('final_output/' + str(rand_num) + '.jpg', image) for i in range(0, len(list2)): gsmain.FilePaths.fnInfer = gsmain.FilePaths.fnInfer + [list2[i]] gsmain.main('final_output/' + str(rand_num))
import matplotlib.pyplot as plt import tensorflow as tf import cv2 from PIL import Image import pytesseract import os from ocr.helpers import implt, resize from ocr import page from ocr import words IMG = '1' # 1, 2, 3 filename = "test/2.jpg" save_filename = "test/2_1.jpg" image = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) implt(image) crop = page.detection(image) implt(crop) gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] gray = cv2.medianBlur(gray, 3) implt(gray) cv2.imwrite(save_filename, gray) text = pytesseract.image_to_string(Image.open(save_filename)) os.remove(save_filename) print(text)
plt.rcParams['figure.figsize'] = (15.0, 10.0) # ### Global Variables # In[3]: IMG = "page09" # Image name/number # # Finding the text areas and words # In[4]: # Loading image (converting to RGB) image = cv2.cvtColor(cv2.imread("../data/pages/%s.jpg" % IMG), cv2.COLOR_BGR2RGB) image = page.detection(image) img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) implt(img, 'gray') # In[5]: def sobel(channel): """ The Sobel Operator""" sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0) sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1) # Combine x, y gradient magnitudes sqrt(x^2 + y^2) sobel = np.hypot(sobelX, sobelY) sobel[sobel > 255] = 255 return np.uint8(sobel)
def crop_image(image): return page.detection(image)