def segment(file_path): """reads images from data/ and outputs the word-segmentation to out/""" shutil.rmtree('out/toNN.png') # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(file_path)) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=350) # write output to 'out/inputFileName' directory if not os.path.exists('out/toNN.png'): os.mkdir('out/toNN.png') # iterate over all segmented words # print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('out/toNN.png/%d.png' % j, wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image
def main(): imgFiles = os.listdir('../data/') for (i, f) in enumerate(imgFiles): print('Segmenting words of sample %s' % f) img = prepareImg(cv2.imread('../data/%s' % f), 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) if not os.path.exists('../out/%s' % f): os.mkdir('../out/%s' % f) print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../out/%s/%d.png' % (f, j), wordImg) cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) cv2.imwrite('../out/%s/summary.png' % f, img)
def character_Segmentation(path, inputFileName): charFiles = os.listdir(path) print('charfiles = ', charFiles) for (i, f) in enumerate(charFiles): newFile = f.split('.') img = prepareImg(cv2.imread(path + '\\%s' % (f)), 210) if not (os.path.exists(MAIN_FOLDER + "/processed_character_images/%s/%s" % (inputFileName, i))): os.makedirs(MAIN_FOLDER + "/processed_character_images/%s/%s" % (inputFileName, i)) charRes = wordSegmentation(img, kernelSize=25, sigma=1, theta=1, minArea=4000) for (j, w) in enumerate(charRes): (charBox, charImg) = w (x, y, w, h) = charBox cv2.imwrite( MAIN_FOLDER + "/processed_character_images/%s/%s/%d.png" % (inputFileName, i, j), charImg) cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imshow('result', img) cv2.waitKey(0) returnPath = MAIN_FOLDER + '/processed_character_images/%s/' % ( inputFileName) return returnPath
def segment_main(): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir('data_/') for (i, f) in enumerate(imgFiles): # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('data_/%s' % f), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory if not os.path.exists('out/%s' % f): os.mkdir('out/%s' % f) # iterate over all segmented words for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('out/%s/%d.png' % (f, j), wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('out/%s/summary.png' % f, img)
def singleCharacterSegmentation(path, inputFileName): newCharFile = inputFileName.split('.') print(path, inputFileName) # sys.exit() img = prepareImg(cv2.imread(path), 210) img = cv2.resize(img, (0, 0), fx=2.5, fy=2.5) charRes = wordSegmentation(img, kernelSize=101, sigma=5, theta=2, minArea=4100) if not (os.path.exists(MAIN_FOLDER + "/processed_single_character_images/%s" % (newCharFile[0]))): os.makedirs(MAIN_FOLDER + "/processed_single_character_images/%s" % (newCharFile[0])) for (j, w) in enumerate(charRes): print(j) (charBox, charImg) = w (x, y, w, h) = charBox cv2.imwrite( MAIN_FOLDER + "/processed_single_character_images/%s/%d.png" % (newCharFile[0], j), charImg) cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imshow('result', img) cv2.waitKey(0) returnPath = MAIN_FOLDER + '/processed_single_character_images/%s' % ( newCharFile[0]) return returnPath
def word_segment(img_name): # separate document image into word images print('Segmenting words of sample %s' % img_name) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(img_name), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory if os.path.exists('./%s' % img_name): shutil.rmtree('./%s' % img_name) os.mkdir('./%s' % img_name) else: os.mkdir('./%s' % img_name) # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('./%s/%d/%d.png' % (img_name, j), wordImg, wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('./%s/summary.png' % img_name, img)
def segment_to_words(file_path): """Segment line to words. Arguments: file_path - path or name of file/-s in file system. Returns: All found words from given directory. """ # Get names of files in given dir to imgFiles if os.path.isdir(file_path): imgFiles = os.listdir(file_path) else: imgFiles = file_path # If it a file - ??? (could it be?) found_words = [] # All found words in this dir for (i, f) in enumerate(imgFiles): print("File #", i, " Name: ", f) print('Segmenting words of sample %s' % f) # Check requirements for the image file to be processed by program if not check_file("%s/%s" % (file_path,f)): continue img = prepareImg(cv2.imread('%s%s' % (file_path, f)), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area # Returns: List of tuples. Each tuple contains the bounding box and the image of the segmented word. tmp_words = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=200) found_words.append(tmp_words) return found_words
def main(): """reads images from data/ and outputs the word-segmentation to out/""" path = sys.argv[0] # read input images from 'in' directory imgFiles = os.listdir(path) for (i,f) in enumerate(imgFiles): print('Segmenting words of sample %s'%f) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(path+'/%s'%f), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory #if not os.path.exists('file:///C:/Users/kiruthika.parthiban/Desktop/Data_Standardization/HandwrittenTextRecognition/notes/out/'%f): #os.mkdir('file:///C:/Users/kiruthika.parthiban/Desktop/Data_Standardization/HandwrittenTextRecognition/notes/out/'%f) b=0 # iterate over all segmented words print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox #os.mkdir('C:/Users/kiruthika.parthiban/Documents/lines/word{i}') cv2.imwrite(path+'/word{i}/'+str(b)+'.png', wordImg) # save word cv2.rectangle(img,(x,y),(x+w,y+h),0,1) # draw bounding box in summary image b+=1 # output summary image with bounding boxes around words cv2.imwrite(path+'/summary.png', img)
def LineData(decoderType,dump): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir(FilePaths.fnLineData) if not os.path.exists('../data/out/'): os.mkdir('../data/out') print(open(FilePaths.fnAccuracy).read()) out_dict={} model = Model(open(FilePaths.fnCharList).read(), decoderType, mustRestore=True, dump=dump) for (i,f) in enumerate(imgFiles): #print('Segmenting words of sample %s'%f) file_compo=f.split('.') fname=f f=file_compo[0] extension=file_compo[1] out_dict[f] = [] if extension == 'pdf': continue # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(FilePaths.fnLineData+fname), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory if not os.path.exists('../data/out/%s'%f): os.mkdir('../data/out/%s'%f) # iterate over all segmented words #print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../data/out/%s/%d.png'%(f, j), wordImg) # save word FilePaths.fnInfer = '../data/out/%s/%d.png'%(f,j) result, prob = infer(model, FilePaths.fnInfer) #updating output dictionary out_dict[f].append(result) #deleting intermediate file os.remove('../data/out/%s/%d.png'%(f, j)) cv2.rectangle(img,(x,y),(x+w,y+h),0,1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('../data/out/%s/summary.png'%f, img) #generating json output json_object=json.dumps(out_dict, indent=4) with open('../data/out/output.json', "w") as outfile: outfile.write(json_object)
def main(): imgFiles = os.listdir('../data/MainInputToSplit/') for (i,f) in enumerate(imgFiles): # print('Segmenting words of sample %s'%f) img = prepareImg(cv2.imread('../data/MainInputToSplit/%s'%f), 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../data/SplitOutputs/%d.png'%j, wordImg) cv2.rectangle(img,(x,y),(x+w,y+h),0,1)
def main(): Infilename = sys.argv[1] img = cv2.imread(Infilename, cv2.IMREAD_GRAYSCALE) # increase contrast pxmin = np.min(img) pxmax = np.max(img) imgContrast = (img - pxmin) / (pxmax - pxmin) * 255 # increase line width kernel = np.ones((3, 3), np.uint8) imgMorph = cv2.erode(imgContrast, kernel, iterations = 1) img = prepareImg(imgMorph, 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) if os.path.isdir(path): shutil.rmtree(path) os.mkdir(path) for (j, w) in enumerate(res): (wordBox, wordImg) = w cv2.imwrite('../SegWords/%d.png'%j, wordImg) files = [] for filename in sorted(os.listdir(path)): files.append(os.path.join(path,filename)) decoderType = DecoderType.WordBeamSearch #decoderType = DecoderType.BeamSearch #decoderType = DecoderType.BestPath model = Model(open('../model/charList.txt').read(), decoderType, mustRestore=True) imgs = [] for fp in files: imgs.append(preprocess(cv2.imread(fp, cv2.IMREAD_GRAYSCALE), Model.imgSize)) batch = Batch(None, imgs) (recognized, probability) = model.inferBatch(batch, True) model = Model(open('../model/charList.txt').read(), decoderType, mustRestore=True) file1 = open("myfile.txt","w") l='' print('The predicted sentence is : ',end="'") for pw in recognized: l += pw l += ' ' print(pw, end=" ") print("'") l += '\n' file1.write(l) file1.close()
def segment(): """reads images from data/input and outputs the word-segmentation to data/input_segmented""" num_words_per_img = [] # read input images from 'in' directory imgFiles = os.listdir('../data/input/') for (i, f) in enumerate(imgFiles): # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('../data/input/%s' % f), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) num_words_per_img.append(len(res)) # write output to 'data/input_segmented' directory if not os.path.exists('../data/input_segmented'): os.mkdir('../data/input_segmented') max_index = 0 else: if len(os.listdir('../data/input_segmented')) != 0: max_index = sorted([ int(img[:-4]) for img in os.listdir('../data/input_segmented') ])[-1] else: max_index = 0 # iterate over all segmented words # print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w # (x, y, w, h) = wordBox cv2.imwrite('../data/input_segmented/%d.png' % (max_index + j + 1), wordImg) # save word #cv2.rectangle(img,(x,y),(x+w,y+h),0,1) # draw bounding box in summary image max_index += len(res) # output summary image with bounding boxes around words #cv2.imwrite('../out/%s/summary.png'%f, img) return num_words_per_img
def segmentation_wraper( img_path ): # yike: should revise a little if the input is already cv2 images img = prepareImg(cv2.imread(img_path), 50) res = wordSegmentation(args, img, kernelSize=25, sigma=11, theta=7, minArea=200) boxes, images = zip(*res) return images
def foo(input_folder): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir(input_folder) for (i, f) in enumerate(imgFiles): print('Segmenting words of sample %s' % f) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(input_folder + '/%s' % f), 50) # ?# # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) words_out_folder = "detected_words" path = "{}/{}".format(words_out_folder, input_folder[input_folder.find("/") + 1:]) print(path) if not os.path.exists( words_out_folder): # create folder to contain the word's img os.mkdir(words_out_folder) if not os.path.exists(path): # make out directory os.mkdir(path) # write output to 'out/inputFileName' directory if not os.path.exists(path + '/%s' % f): os.mkdir(path + '/%s' % f) # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite(path + '/%s/%d.png' % (f, j), wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite(path + '/%s/summary.png' % f, img)
def doWords(img, model): img = prepareImg(img, 50) # img = cv2.fastNlMeansDenoising(img, None) # ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \ cv2.THRESH_BINARY, 11, 11) cv2.imshow("1", img) cv2.waitKey(0) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) words = "" print(len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w words = words + " " + infer(model, wordImg) return words
def test_infer(files, file_path, out_file): file_out = open(out_file, 'w') for img in files: if '.png' in img or '.jpg' in img: # print(test_img) file_out.write(' ') test_img = file_path + img img = prepareImg(cv2.imread(test_img), 64) img2 = img.copy() #25 res = wordSegmentation(img, kernelSize=9, sigma=11, theta=7, minArea=100) if not os.path.exists('tmp'): os.mkdir('tmp') for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('tmp/%d.png' % j, wordImg) cv2.rectangle(img2, (x, y), (x + w, y + h), (0, 255, 0), 1) # draw bounding box in summary image # cv2.imwrite('Resource/summary.png', img2) # plt.imshow(img2) #start # imgFiles = os.listdir('tmp') # imgFiles = sorted(imgFiles) # # pred_line = [] # for f in imgFiles: # pred_line.append(predict_image(w_model_predict, 'tmp/'+f, True)) # print('-----------PREDICT-------------') # print('[Word model]: '+' '.join(pred_line)) # pred_line = correction_list(pred_line) # print('[Word model with spell]: '+' '.join(pred_line)) #end x = predict_image(l_model_predict, test_img, False) print('[Line model]: ' + x) file_out.write(x) plt.show() shutil.rmtree('tmp')
def detect_word_model(model_predict, test_img): img = prepareImg(cv2.imread(test_img), 64) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) if not os.path.exists('tmp'): os.mkdir('tmp') for (j, w) in enumerate(res): (wordBox, wordImg) = w cv2.imwrite('tmp/%d.png' % j, wordImg) imgFiles = os.listdir('tmp') imgFiles = sorted(imgFiles) pred_line = [] for f in imgFiles: pred_line.append(predict_image(model_predict, 'tmp/' + f, True)) shutil.rmtree('tmp') pred_line = correction_list(pred_line) return (' '.join(pred_line))
def main(): fnCharList = '../model/charList.txt' fnAccuracy = '../model/accuracy.txt' fnTrain = '../data/' fnInfer = '../data/test2.png' fnCorpus = '../data/corpus.txt' """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir('../d/') model = Model(open(fnCharList).read(), DecoderType.BestPath, mustRestore=True) file = FilePaths() for (i, f) in enumerate(imgFiles): print('Segmenting words of sample %s' % f) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('../data/%s' % f), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory #if not os.path.exists('../out/%s'%f): #os.mkdir('../out/%s'%f) # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox batch = Batch(None, [img]) (recognized, probability) = model.inferBatch(batch, True) print('Recognized:', '"' + recognized[0] + '"') print('Probability:', probability[0]) #cv2.imwrite('../out/%s/%d.png'%(f, j), wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image
def main(): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir('../data/') for (i, f) in enumerate(imgFiles): print('Segmenting words of sample %s' % f) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('../data/%s' % f), 50) ## img=cvtColor(img, gray, CV_BGR2GRAY); # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=3, sigma=5, theta=2, minArea=140) # write output to 'out/inputFileName' directory if not os.path.exists('../out/%s' % f): os.mkdir('../out/%s' % f) # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox wordImg = np.pad(wordImg, ((4, 4), (4, 4)), "constant", constant_values=(255)) ## ret,wordImg = cv2.threshold(wordImg,100,255,cv2.THRESH_BINARY) ret, wordImg = cv2.threshold(wordImg, 90, 255, cv2.THRESH_BINARY) ## x=np.mean(wordImg) print(np.mean(wordImg)) cv2.imwrite('../out/%s/%d.png' % (f, j), wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('../out/%s/summary.png' % f, img)
def main(): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory imgFiles = os.listdir('D:/SimpleHTR/input/') for (i, f) in enumerate(imgFiles): print('Segmenting words of sample %s' % f) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('D:/SimpleHTR/input/%s' % f), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory '''if not os.path.exists('D:/SimpleHTR/out/%s'%f): os.mkdir('D:/SimpleHTR/out/%s'%f)''' # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('D:/SimpleHTR/data/test.png', wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image os.path.join(os.path.dirname('D:/SimpleHTR/src/main.py')) tf.compat.v1.reset_default_graph() exec(open('main.py').read()) # output summary image with bounding boxes around words cv2.imwrite('D:/SimpleHTR/data/summary.png', img) apex = open("D:/SimpleHTR/data/output.txt", "a") apex.write("\n") apex.close()
def predict(w_model_predict, l_model_predict, test_img): res = [] text = [] img = prepareImg(cv2.imread(test_img), 64) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) if not os.path.exists('tmp'): os.mkdir('tmp') for (j, w) in enumerate(res): (wordBox, wordImg) = w cv2.imwrite('tmp/%d.png' % j, wordImg) imgFiles = os.listdir('tmp') imgFiles = sorted(imgFiles) for f in imgFiles: text.append(predict_image(w_model_predict, 'tmp/' + f, is_word=True)) shutil.rmtree('tmp') text = correction_list(text) text1 = ' '.join(text) text2 = predict_image(l_model_predict, test_img, is_word=False) return text1, text2
def word_Segmentation(path, inputFileName): newFile = inputFileName.split('.') img = prepareImg(cv2.imread(path), 60) img = cv2.resize(img, (0, 0), fx=2.5, fy=2.5) wordRes = wordSegmentation(img, kernelSize=27, sigma=13, theta=7, minArea=250) if not os.path.exists(MAIN_FOLDER + "/processed_images\\%s" % newFile[0]): os.makedirs(MAIN_FOLDER + "/processed_images\\%s" % newFile[0]) temp = MAIN_FOLDER + "/processed_images\\%s" % newFile[0] for (j, w) in enumerate(wordRes): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite( MAIN_FOLDER + "/processed_images/%s/%d.png" % (newFile[0], j), wordImg) cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) cv2.imshow('result', img) cv2.waitKey(0) return temp
def main(): imgFiles = os.listdir('../linestowords/') for (i,f) in enumerate(imgFiles): print('i = %d'%i) print('f = %s'%f) print('Segmenting words of sample %s'%f) img = prepareImg(cv2.imread('../linestowords/%s'%f), 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) if not os.path.exists('../../SimpleHTR/data/%s'%f): os.mkdir('../../SimpleHTR/data/%s'%f) print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox # increase contrast # pxmin = np.min(wordImg) # pxmax = np.max(wordImg) # imgContrast = (wordImg - pxmin) / (pxmax - pxmin) * 255 # increase line width # kernel = np.ones((3, 3), np.uint8) # imgMorph = cv2.erode(imgContrast, kernel, iterations = 1) # dim = (128, 32) # # resize image # resized = cv2.resize(wordImg, dim, interpolation = cv2.INTER_AREA) cv2.imwrite('../../SimpleHTR/data/%s/%d.png'%(f, j), wordImg) cv2.rectangle(img,(x,y),(x+w,y+h),0,1)
def main(): Infilename = sys.argv[1] img = prepareImg(cv2.imread(Infilename), 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) os.mkdir(path) for (j, w) in enumerate(res): (wordBox, wordImg) = w cv2.imwrite('../SegWords/%d.png' % j, wordImg) files = [] for filename in sorted(os.listdir(path)): files.append(os.path.join(path, filename)) decoderType = DecoderType.WordBeamSearch model = Model(open('../model/charList.txt').read(), decoderType, mustRestore=True) predicted = [] probability = [] for fp in files: pred, prob = infer(model, fp) predicted.append(pred) probability.append(prob) shutil.rmtree(path) print('The predicted sentence is : ', end="'") for pw in predicted: print(pw, end=" ") print("'") print('The average probability is : ', end="") sum = 0 for prob in probability: sum += prob print(sum / len(files) * 100)
def segment(): """reads images from data/ and outputs the word-segmentation to out/""" # read input images from 'in' directory img = "data/in.jpg" # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread(img), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) #delete all files in segmeted directory files = glob.glob('segmented/*') for f in files: os.remove(f) #delete all files in contrast directory files = glob.glob('contrast/*') for f in files: os.remove(f) # iterate over all segmented words print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('segmented/%d.png'%(j), wordImg) # save word cv2.rectangle(img,(x,y),(x+w,y+h),0,1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('summary/summary.png', img)
def segmentation_wraper( img_path, word=False ): # yike: should revise a little if the input is already cv2 images img = prepareImg(cv2.imread(img_path), 50) res = wordSegmentation(args, img, kernelSize=25, sigma=11, theta=7, minArea=200) boxes, images = zip(*res) #print('wwwwwwwwww') #print(len(images)) #print(images[0].shape) if word: images = [ cv2.transpose( cv2.resize(img, (args.imgsize[0], args.imgsize[1]), interpolation=cv2.INTER_CUBIC)) ] return images
def main(): # optional command line args parser = argparse.ArgumentParser() parser.add_argument('--image_name', type=str, default='test_img.png' ) # make sure this ends in an image file extension parser.add_argument('--camera_number', type=int, default=0) args = parser.parse_args() img_name = args.image_name camera_number = args.camera_number decoderType = DecoderType.BestPath # capture image of document cam = Camera(camera_number, img_name) got_img = cam.capture() if not got_img: return # prepare image for word segmentation processed_img_name = increase_contrast(img_name) # separate document image into word images print('Segmenting words of sample %s' % img_name) # read image, prepare it by resizing it to fixed height and converting it to grayscale img = prepareImg(cv2.imread('../data/%s' % processed_img_name), 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) # write output to 'out/inputFileName' directory if os.path.exists('../out/%s' % img_name): shutil.rmtree('../out/%s' % img_name) os.mkdir('../out/%s' % img_name) else: os.mkdir('../out/%s' % img_name) # iterate over all segmented words print('Segmented into %d words' % len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../out/%s/%d.png' % (img_name, j), wordImg) # save word cv2.rectangle(img, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('../out/%s/summary.png' % img_name, img) # analyze words text = [] print(open(FilePaths.fnAccuracy).read()) model = Model(open(FilePaths.fnCharList).read(), decoderType, mustRestore=True, dump=False) for word in os.listdir(f'../out/{img_name}'): if word != 'summary.png': new_word = infer(model, os.path.join('../out/', img_name, word)) text.append(new_word) print(f'Document Text: {text}')
def main(): "main function" # optional command line args parser = argparse.ArgumentParser() parser.add_argument('--train', help='train the NN', action='store_true') parser.add_argument('--validate', help='validate the NN', action='store_true') parser.add_argument('--beamsearch', help='use beam search instead of best path decoding', action='store_true') parser.add_argument('--wordbeamsearch', help='use word beam search instead of best path decoding', action='store_true') parser.add_argument('--dump', help='dump output of NN to CSV file(s)', action='store_true') args = parser.parse_args() decoderType = DecoderType.BestPath if args.beamsearch: decoderType = DecoderType.BeamSearch elif args.wordbeamsearch: decoderType = DecoderType.WordBeamSearch # train or validate on IAM dataset if args.train or args.validate: # load training data, create TF model loader = DataLoader(FilePaths.fnTrain, Model.batchSize, Model.imgSize, Model.maxTextLen) # save characters of model for inference mode open(FilePaths.fnCharList, 'w').write(str().join(loader.charList)) # save words contained in dataset into file open(FilePaths.fnCorpus, 'w').write(str(' ').join(loader.trainWords + loader.validationWords)) # execute training or validation if args.train: model = Model(loader.charList, decoderType) train(model, loader) elif args.validate: model = Model(loader.charList, decoderType, mustRestore=True) validate(model, loader) # infer text on test image else: img = prepareImg(cv2.imread('../data/testline.png'), 50) res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100) stringList = [] accuracyList = [] #print('Segmented into %d words'%len(res)) #print(open(FilePaths.fnAccuracy).read()) model = Model(open(FilePaths.fnCharList).read(), decoderType, mustRestore=True, dump=args.dump) for (j, w) in enumerate(res): (wordBox, wordImg) = w #cv2.imwrite('../data/test.png', wordImg) #txtt = infer(model, FilePaths.fnInfer) accuracy, text = infer(model, wordImg) #stringList += " " + txtt stringList.append(text) accuracyList.append(accuracy) # commands for clearing screen print(chr(27)+'[2j') print('\033c') print('\x1bc') print(stringList) print(accuracyList)
def infer(model, fnImg): "recognize text in image provided by file path" imgFiles = os.listdir('../data2/') for (i, f) in enumerate(imgFiles): imagelign = prepareImg(cv2.imread("../data2/03.png"), 89) res = wordSegmentation(imagelign, kernelSize=25, sigma=7, theta=11, minArea=100) print('Segmented into %d words' % len(res)) if not os.path.exists('../out/%s' % f): os.mkdir('../out/%s' % f) print('Segmented into %d words' % len(res)) for ij in res: print(ij[0]) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../out/%s/%d.png' % (f, j), wordImg) # save word cv2.rectangle(imagelign, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite('../out/%s/summary.png' % f, imagelign) imagepilres = Image.open('../out/%s/summary.png' % f) imagepilres = imagepilres.convert('RGB') pixelsres = imagepilres.load() # create the pixel map imagepilres.show() for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox cv2.imwrite('../out/%s/%d.png' % (f, j), wordImg) # save word cv2.rectangle(imagelign, (x, y), (x + w, y + h), 0, 1) # draw bounding box in summary image ####integration of word spelling imagepil = Image.open('../out/%s/%d.png' % (f, j)) imgrgb = cv2.imread('../out/%s/%d.png' % (f, j)) #(thresh, blackAndWhiteImage) = cv2.threshold(imgrgb, 127, 255, cv2.THRESH_BINARY) img = preprocess( cv2.imread('../out/%s/%d.png' % (f, j), cv2.IMREAD_GRAYSCALE), Model.imgSize) batch = Batch(None, [img]) (recognized, probability) = model.inferBatch(batch, True) print('Recognized:', '"' + recognized[0] + '"') print('Probability:', probability[0]) misspelled = [recognized[0]] misspelled = spell.unknown(misspelled) print('correction:', spell.correction(recognized[0])) for wordd in misspelled: print(wordd, spell.correction(wordd)) # output summary image with bounding boxes around words if (len(spell.correction(recognized[0])) > len(recognized[0])): print("erreur d'orthographe avec manque de lettre") #imagepil.show() imagepil = imagepil.convert('RGB') #imagepil.show() indexerrorspell = [] print(spell.correction(recognized[0])) print(len(spell.correction(recognized[0]))) print((recognized[0])) print(len((recognized[0]))) print(imagepil.size[0]) jj = 0 ind = 0 for lettre in (recognized[0]): print(lettre) #print(recognized[0][jj]) print("") if (lettre != spell.correction(recognized[0])[jj]): indexerrorspell.append(jj) jj = jj + 1 nbslices = imagepil.size[0] // len((recognized[0])) print(nbslices) print(indexerrorspell) pixels = imagepil.load() # create the pixel map for i in range(nbslices * 0, nbslices * (len(recognized[0]))): # for every col: for j in range(imagepil.size[1]): # For every row if (pixels[i, j][0] < 100): pixels[i, j] = (255, 0, 255) for indspell in indexerrorspell: for i in range( nbslices * indspell, nbslices * (indspell + 1)): #(imagepil.size[0]): # for every col: for j in range(imagepil.size[1]): # For every row if (pixels[i, j][0] < 100): pixels[i, j] = ( 255, 0, 0 ) #pixels[i,j] = (i, j, 200) # set the colour accordingly #imagepil.show() ires2 = 0 jres2 = 0 for ires in range(x, x + w): for jres in range(y, y + h): pixelsres[ires, jres] = pixels[ires2, jres2] jres2 = (jres2 + 1) % h #print(jres2) ires2 = (ires2 + 1) % w if (len(spell.correction(recognized[0])) < len(recognized[0])): print("erreur d'orthographe avec des lettres supplimentaires") #imagepil.show() imagepil = imagepil.convert('RGB') #imagepil.show() indexerrorspell = [] indexcharsupp = [] print(spell.correction(recognized[0])) print(len(spell.correction(recognized[0]))) print((recognized[0])) print(len((recognized[0]))) print(imagepil.size[0]) jj = 0 for lettre in spell.correction(recognized[0]): print(lettre) print(recognized[0][jj]) print("") if (lettre != recognized[0][jj]): indexerrorspell.append(jj) jj = jj + 1 suppletter = len(recognized[0]) - len( spell.correction(recognized[0])) nbslices = imagepil.size[0] // len((recognized[0])) print(nbslices) print(indexerrorspell) pixels = imagepil.load() # create the pixel map for indspell in indexerrorspell: for i in range( nbslices * indspell, nbslices * (indspell + 1)): #(imagepil.size[0]): # for every col: for j in range(imagepil.size[1]): # For every row if (pixels[i, j][0] < 100): pixels[i, j] = ( 255, 0, 0 ) #pixels[i,j] = (i, j, 200) # set the colour accordingly for i in range(nbslices * (len(recognized[0]) - suppletter), nbslices * (len(recognized[0]))): # for every col: for j in range(imagepil.size[1]): # For every row if (pixels[i, j][0] < 100): pixels[i, j] = (0, 255, 255) #imagepil.show() ires2 = 0 jres2 = 0 for ires in range(x, x + w): for jres in range(y, y + h): pixelsres[ires, jres] = pixels[ires2, jres2] jres2 = (jres2 + 1) % h #print(jres2) ires2 = (ires2 + 1) % w if (len(spell.correction(recognized[0])) == len(recognized[0])): print("erreur d'orthographe") #imagepil.show() imagepil = imagepil.convert('RGB') #imagepil.show() indexerrorspell = [] print(spell.correction(recognized[0])) print(len(spell.correction(recognized[0]))) print((recognized[0])) print(len((recognized[0]))) print(imagepil.size[0]) jj = 0 ind = 0 for lettre in spell.correction(recognized[0]): print(lettre) print(recognized[0][jj]) print("") if (lettre != recognized[0][jj]): indexerrorspell.append(jj) jj = jj + 1 nbslices = imagepil.size[0] // len((recognized[0])) print(nbslices) print(indexerrorspell) pixels = imagepil.load() # create the pixel map for indspell in indexerrorspell: for i in range( nbslices * indspell, nbslices * (indspell + 1)): #(imagepil.size[0]): # for every col: for j in range(imagepil.size[1]): # For every row if (pixels[i, j][0] < 100): pixels[i, j] = ( 255, 0, 0 ) #pixels[i,j] = (i, j, 200) # set the colour accordingly ires2 = 0 jres2 = 0 for ires in range(x, x + w): for jres in range(y, y + h): pixelsres[ires, jres] = pixels[ires2, jres2] jres2 = (jres2 + 1) % h #print(jres2) ires2 = (ires2 + 1) % w #imagepil.show() imagepilres.show() #cv2.imwrite('../out/%s/summary.png'%f, imagepilres) '''
def line_segment(filepath, filenames, model): out_dict={} out_path='../data/out/' truth_path='../data/true_text/' compare=False if os.path.exists(truth_path+'truth.json'): numCharErr = 0 numCharTotal = 0 numWordOK = 0 numWordTotal = 0 compare = True with open(truth_path+'truth.json', 'r') as truth: truth_file = json.load(truth) for filename in filenames: fullpath=os.path.join(filepath,filename) f=filename.split('.')[0] ext=filename.split('.')[1] if ext=='pdf': continue out_dict[f] = [] print('Reading image "' + filename + '"..') im = cv2.imread(fullpath) output_path = out_path+f if not os.path.exists(output_path): os.mkdir(output_path) line_segmentation = LineSegmentation(img=im, output_path=output_path) lines = line_segmentation.segment() if(len(lines)==0): im = cv2.imread(fullpath, 0) _ , imbw = cv2.threshold(im, 0, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU) lines = [imbw] n_line=1 n_word=0 for line in lines: img = prepareImg(line, 50) # execute segmentation with given parameters # -kernelSize: size of filter kernel (odd integer) # -sigma: standard deviation of Gaussian function used for filter kernel # -theta: approximated width/height ratio of words, filter function is distorted by this factor # - minArea: ignore word candidates smaller than specified area res = wordSegmentation(img, kernelSize=25, sigma=11, theta=7, minArea=100, increase_dim=10) # iterate over all segmented words #print('Segmented into %d words'%len(res)) for (j, w) in enumerate(res): (wordBox, wordImg) = w (x, y, w, h) = wordBox imgloc=output_path+'/%d.png'%j # increase contrast # preprocess so that it is similar to IAM dataset kernel = np.ones((2, 2), np.uint8) wordImg = cv2.erode(wordImg, kernel, iterations = 1) cv2.imwrite(imgloc, wordImg) # save word #FilePaths.fnInfer = 'out/%s/%d.png'%(f,j) #result, prob = infer(model, imgloc) try: result, prob = infer(model, imgloc) except: print("Couldn't infer: image%d"%j) result="" #compare with ground truth if compare: numWordOK += 1 if truth_file[f][n_word] == result else 0 numWordTotal += 1 dist = editdistance.eval(result, truth_file[f][n_word]) numCharErr += dist numCharTotal += len(truth_file[f][n_word]) print('[OK]' if dist==0 else '[ERR:%d]' % dist,'"' + truth_file[f][n_word] + '"', '->', '"' + result + '"') #updating output dictionary out_dict[f].append(result) n_word+=1 #deleting intermediate file os.remove(imgloc) cv2.rectangle(img,(x,y),(x+w,y+h),0,1) # draw bounding box in summary image # output summary image with bounding boxes around words cv2.imwrite(output_path+'/summary%d.png'%n_line, img) n_line+=1 if compare: charErrorRate = numCharErr / numCharTotal wordAccuracy = numWordOK / numWordTotal print('Character error rate: %f%%. Word accuracy: %f%%.' % (charErrorRate*100.0, wordAccuracy*100.0)) return out_dict