def run_main(fl): try: return main(np.asarray(Image.open(fl).convert('L'))/255, conf=Config(break_width=2.5, recognizer='hmm', segmenter='stochastic', page_type='pecha', line_break_method='line_cluster'), page_info={'flname':fl, 'volume': VOL}) except: return []
def viterbi_post_process(self, img_arr, results): '''Go through all results and attempts to correct invalid syllables''' final = [[] for i in range(len(results))] for i, line in enumerate(results): syllable = [] for j, char in enumerate(line): if char[-1] in u'་། ' or not word_parts_set.intersection(char[-1]) or j == len(line)-1: if syllable: syl_str = ''.join(s[-1] for s in syllable) if is_non_std(syl_str) and syl_str not in syllables: print syl_str, 'HAS PROBLEMS. TRYING TO FIX' bx = combine_many_boxes([ch[0:4] for ch in syllable]) bx = list(bx) arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]] arr = fadd_padding(arr, 3) try: temp_dir = tempfile.mkdtemp() tmpimg = os.path.join(temp_dir, 'tmp.tif') Image.fromarray(arr*255).convert('L').save(tmpimg) pgrec = PageRecognizer(tmpimg, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocessing=True, clear_hr=False, detect_o=False)) prob, hmm_res = pgrec.recognize_page() os.remove(tmpimg) os.removedirs(temp_dir) except TypeError: print 'HMM run exited with an error.' prob = 0 hmm_res = '' logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res)) if prob == 0 and hmm_res == '': print 'hit problem. using unmodified output' for s in syllable: final[i].append(s) else: bx.append(prob) bx.append(hmm_res) final[i].append(bx) else: for s in syllable: final[i].append(s) final[i].append(char) syllable = [] else: syllable.append(char) if syllable: for s in syllable: final[i].append(s) return final
def __init__(self): # if 'new.txt' exists, show it in a dialog if os.path.exists('new.txt'): msg = open('new.txt').read() new_dlg = WhatsNew(self, msg) if new_dlg.ShowModal() == wx.ID_CANCEL: os.remove('new.txt') else: pass configfile = self.get_configfile() self.config = Config(configfile) self.init_project()
def run_main(fl, conf=None, text=False): '''Helper function to do recognition''' if not conf: # conf = Config(low_ink=False, segmenter='stochastic', recognizer='hmm', # break_width=2.0, page_type='pecha', line_break_method='line_cluster', # line_cluster_pos='center', postprocess=False, detect_o=False, # clear_hr = False) # conf = Config(segmenter='stochastic', recognizer='hmm', break_width=2.5, line_break_method='line_cut', postprocess=False, low_ink=False, stop_line_cut=False, clear_hr=True, detect_o=False) return main(np.asarray(Image.open(fl).convert('L'))/255, conf=conf, page_info={'flname':os.path.basename(fl), 'volume': VOL}, text=text)
def viterbi_post_process(img_arr, results): '''Go through all results and attempts to correct invalid syllables''' final = [[] for i in range(len(results))] for i, line in enumerate(results): syllable = [] for j, char in enumerate(line): if char[-1] in u'་། ' or not word_parts.intersection(char[-1]) or j == len(line)-1: if syllable: syl_str = ''.join(s[-1] for s in syllable) if is_non_std(syl_str) and syl_str not in syllables: print syl_str, 'HAS PROBLEMS. TRYING TO FIX' bx = combine_many_boxes([ch[0:4] for ch in syllable]) bx = list(bx) arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]] arr = fadd_padding(arr, 3) try: prob, hmm_res = main(arr, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocess=True, clear_hr=False), page_info={'flname':''}) except TypeError: print 'HMM run exited with an error.' prob = 0 hmm_res = '' # corrections[syl_str].append(hmm_res) logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res)) if prob == 0 and hmm_res == '': print 'hit problem. using unmodified output' for s in syllable: final[i].append(s) else: bx.append(prob) bx.append(hmm_res) final[i].append(bx) else: for s in syllable: final[i].append(s) final[i].append(char) syllable = [] else: syllable.append(char) if syllable: for s in syllable: final[i].append(s) return final
def main(): conf = Config() if not conf.is_set("downloads_path", "documents_path"): ##create and set useful paths downloads_path, documents_path = get_downloads_and_documents_path() conf.add(("downloads_path", downloads_path), ("documents_path", documents_path)) conf.set_config() else: ## restore usefull paths downloads_path, documents_path = conf.get("downloads_path", "documents_path") if check_download_and_documents_path(downloads_path, documents_path): root, dirs, files = os.walk('.').next() for f in files: full_path = os.path.join(downloads_path, f) print(os.path.splitext(full_path)) else: raise Exception("Download or Documents directory not found")
def main(): config = Config.loadConfig('config.ini') ensureArchiveDir(config.archiveDir) manager = DatabaseManager.createManager(config.archiveDir) manager.connectDB() for dirpath, dirnames, filenames in os.walk(config.inputDir): for filename in filenames: try: filePath = os.path.join(dirpath, filename) extension = os.path.splitext(filePath)[1].strip().strip('.') if len(extension) == 0: log.warning(f'skip unknown file: {filePath}') elif file_utils.isImage(extension) or file_utils.isVideo( extension): processFile(manager, config.archiveDir, filePath) else: log.warning(f'skip unsupported file: {filePath}') except Exception as e: log.failure(f'Unexpected error: {e}') manager.closeDB()
def run_recognize(imagepath): global args command_args = args if command_args.conf: conf_dict = load_config(command_args.conf) else: conf_dict = default_config # Override any confs with command line versions for key in conf_dict: if not hasattr(command_args, key): continue val = getattr(command_args, key) if val: conf_dict[key] = val rec = PageRecognizer(imagepath, conf=Config(**conf_dict)) if args.format == 'text': text = True else: text = False return rec.recognize_page(text=text)
def main(page_array, conf=Config(viterbi_postprocess=False, line_break_method = None, page_type = None), retries=0, text=False, page_info={}): '''Main procedure for processing a page from start to finish Parameters: -------------------- page_array: a 2 dimensional numpy array containing binary pixel data of the image page_info: dictionary, optional A dictionary containing metadata about the page to be recognized. Define strings for the keywords "flname" and "volume" if saving a serialized copy of the OCR results. retries: Used internally when system attempts to reboot a failed attempt text: boolean flag. If true, return text rather than char-position data Returns: -------------- text: str Recognized text for entire page if text=False, return character position and label data as a python dictionary ''' print page_info.get('flname','') confpath = conf.path conf = conf.conf line_break_method = conf['line_break_method'] page_type = conf['page_type'] ### Set the line_break method automatically if it hasn't been ### specified beforehand if not line_break_method and not page_type: if page_array.shape[1] > 2*page_array.shape[0]: print 'setting page type as pecha' line_break_method = 'line_cluster' page_type = 'pecha' else: print 'setting page type as book' line_break_method = 'line_cut' page_type = 'book' conf['page_type'] = page_type conf['line_break_method'] = line_break_method detect_o = conf.get('detect_o', False) print 'clear hr', conf.get('clear_hr', False) results = [] out = u'' try: ### Get information about the pages shapes = PE2(page_array, cls, page_type=page_type, low_ink=conf['low_ink'], flpath=page_info.get('flname',''), detect_o=detect_o, clear_hr = conf.get('clear_hr', False)) shapes.conf = conf ### Separate the lines on a page if page_type == 'pecha': k_groups = shapes.num_lines shapes.viterbi_post = conf['viterbi_postprocess'] if line_break_method == 'line_cut': line_info = LineCut(shapes) if not line_info: # immediately skip to re-run with LineCluster sys.exit() elif line_break_method == 'line_cluster': line_info = LineCluster(shapes, k=k_groups) ### Perform segmentation of characters segmentation = Segmenter(line_info) ###Perform recognition if not conf['viterbi_postprocess']: if conf['recognizer'] == 'probout': results = recognize_chars_probout(segmentation) elif conf['recognizer'] == 'hmm': results = recognize_chars_hmm(segmentation, trans_p, start_p) elif conf['recognizer'] == 'kama': results = recognize_chars_probout(segmentation) results = recognize_chars_kama(results, segmentation) if conf['postprocess']: results = viterbi_post_process(segmentation.line_info.shapes.img_arr, results) else: # Should only be call from *within* a non viterbi run... prob, results = hmm_recognize_bigram(segmentation) return prob, results ### Construct an output string output = [] for n, line in enumerate(results): for m,k in enumerate(line): # if isinstance(k[-1], int): # print n,m,k # page_array[k[1]:k[1]+k[3], k[0]:k[0]+k[2]] = 0 # Image.fromarray(page_array*255).show() output.append(k[-1]) output.append(u'\n') out = ''.join(output) print out if text: results = out return results except: ### Retry and assume the error was cause by use of the ### wrong line_break_method... import traceback;traceback.print_exc() if not results and not conf['viterbi_postprocess']: print 'WARNING', '*'*40 print page_info['flname'], 'failed to return a result.' print 'WARNING', '*'*40 print if line_break_method == 'line_cut' and retries < 1: print 'retrying with line_cluster instead of line_cut' try: return main(page_array, conf=Config(path=confpath, line_break_method='line_cluster', page_type='pecha'), page_info=page_info, retries = 1, text=text) except: logging.info('Exited after failure of second run.') return [] if not conf['viterbi_postprocess']: if not results: logging.info('***** No OCR output for %s *****' % page_info['flname']) return results
page_info={'flname':fl, 'volume': VOL}) except: return [] import datetime start = datetime.datetime.now() print 'starting' outfile = codecs.open('/home/zr/latest-ocr-outfile.txt', 'w', 'utf-8') for fl in fls: #### line cut # ret = main((np.asarray(Image.open(fl).convert('L'))/255), # conf=Config(break_width=2., recognizer='probout', # segmenter='stochastic', line_break_method='line_cut', # postprocess=False, stop_line_cut=False, low_ink=False, clear_hr=True), # page_info={'flname':fl, 'volume': VOL}, text=True) #### line cluster ret = main((np.asarray(Image.open(fl).convert('L'))/255), conf=Config(segmenter='stochastic', recognizer='hmm', break_width=2.0, page_type='pecha', line_break_method='line_cluster', line_cluster_pos='center', postprocess=False, detect_o=False, low_ink=False, clear_hr=True), page_info={'flname':fl, 'volume': VOL}, text=True) outfile.write(ret) outfile.write('\n\n') print datetime.datetime.now() - start, 'time taken'
def run_recognize_remote(imagepath, conf_dict, text=False): rec = PageRecognizer(imagepath, conf=Config(**conf_dict)) results = rec.recognize_page(text=text) return results
def recognize_page(self, text=False): try: self.get_page_elements() self.extract_lines() except: import traceback;traceback.print_exc() self.results = [] return self.results self.generate_segmentation() conf = self.conf results = [] try: if not conf['viterbi_postprocessing']: if conf['recognizer'] == 'probout': results = recognize_chars_probout(self.segmentation) elif conf['recognizer'] == 'hmm': results = recognize_chars_hmm(self.segmentation) if conf['postprocess']: # print 'running viterbi post processing as next iter' results = self.viterbi_post_process(self.page_array, results) else: # Should only be call from *within* a non viterbi run... # print 'Debug: Running within viterbi post proc' prob, results = hmm_recognize_bigram(self.segmentation) return prob, results output = [] for n, line in enumerate(results): for m,k in enumerate(line): if isinstance(k[-1], int): print n,m,k self.page_array[k[1]:k[1]+k[3], k[0]:k[0]+k[2]] = 0 Image.fromarray(self.page_array*255).show() output.append(k[-1]) output.append(u'\n') out = ''.join(output) print out if text: results = out self.results = results return results except: import traceback;traceback.print_exc() if not results and not conf['viterbi_postprocessing']: print 'WARNING', '*'*40 print self.page_info['flname'], 'failed to return a result.' print 'WARNING', '*'*40 print if self.line_break_method == 'line_cut' and self.retries < 1: print 'retrying with line_cluster instead of line_cut' try: pr = PageRecognizer(self.imagefile, Config(path=self.confpath, line_break_method='line_cluster', page_type='pecha'), page_info=self.page_info, retries = 1, text=text) return pr.recognize_page() except: logging.info('Exited after failure of second run.') return [] if not conf['viterbi_postprocessing']: if not results: logging.info('***** No OCR output for %s *****' % self.page_info['flname']) if text: results = out self.results = results return results
def main(): parser = argparse.ArgumentParser() parser.add_argument("conf", help="JSON configuration file", type=str) args = parser.parse_args() # Load configuration file try: with open(args.conf, "r") as f: config = Config(**json.load(f)) except Exception as e: print(e) return # if save is enabled crete the output folder if not exists if config.save: if not os.path.isdir(config.output): os.mkdir(config.output) # read the input image img = cv2.imread(config.input) # check if is a valid image if img is None: print(ERR_INPUT_NOT_EXISTS) return # convert the image to gray-scale gray_img = img.copy() if len(img.shape) == 3: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray_img = gray_img # get edges map gray_img_blur = cv2.GaussianBlur(gray_img, (5, 5), 0) #thresh, _ = cv2.threshold(gray_img_blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) thresh, _ = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) edges = cv2.Canny(gray_img_blur, thresh / 2, thresh, apertureSize=3) skew = 0. if config.deskew: # apply Stroke Width Transform swt = swt_transform( gray_img, edges, psi=np.pi / 2, dark_on_light=config.dark_on_light, _save=config.save, _save_path=config.output, ) # apply segmentation on swt ang get layers _, layers = swt_segmentation(swt, gray_img) # filters layers in letter letters = swt_extract_letters( gray_img, layers, min_width=config.letters.min_width, min_height=config.letters.min_height, max_width=config.letters.max_width, max_height=config.letters.max_height, width_height_ratio=config.letters.width_height_ratio, height_width_ratio=config.letters.height_width_ratio, min_diag_mswt_ratio=config.letters.min_diag_mswt_ratio, max_diag_mswt_ratio=config.letters.max_diag_mswt_ratio, ) letters_image = create_letters_edge_image(gray_img, edges, letters) if config.save: cv2.imwrite("{}/pre_swt.jpg".format(config.output), swt) cv2.imwrite("{}/pre_layers_conn.jpg".format(config.output), draw_strokes_connections(img, layers)) cv2.imwrite("{}/pre_layers.jpg".format(config.output), draw_strokes_contours(img, layers)) cv2.imwrite("{}/pre_letters.jpg".format(config.output), draw_strokes_contours(img, letters)) cv2.imwrite("{}/pre_letters_conmn.jpg".format(config.output), draw_strokes_connections(img, letters)) cv2.imwrite("{}/deskew_points.jpg".format(config.output), letters_image) img, skew = skew_correction(img, letters_image, threshold=None, _save=config.save, _save_path=config.output) gray_img = img.copy() if len(img.shape) == 3: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray_img = gray_img gray_img_blur = cv2.GaussianBlur(gray_img, (5, 5), 0) #thresh, _ = cv2.threshold(gray_img_blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) thresh, _ = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) edges = cv2.Canny(gray_img_blur, thresh / 2, thresh, apertureSize=3) # apply Stroke Width Transform swt = swt_transform( gray_img, edges, psi=np.pi / 2, dark_on_light=config.dark_on_light, skip_edges=config.swt_skip_edges, _save=config.save, _save_path=config.output, ) #swt[ gray_img>np.mean(gray_img) ] = np.Infinity # apply segmentation on swt ang get layers _, layers = swt_segmentation( swt, gray_img, skip_edges=config.swt_skip_edges, ) # filters layers in letter letters = swt_extract_letters( gray_img, layers, min_width=config.letters.min_width, min_height=config.letters.min_height, max_width=config.letters.max_width, max_height=config.letters.max_height, width_height_ratio=config.letters.width_height_ratio, height_width_ratio=config.letters.height_width_ratio, min_diag_mswt_ratio=config.letters.min_diag_mswt_ratio, max_diag_mswt_ratio=config.letters.max_diag_mswt_ratio, ) # union letters in words words_h, _ = swt_extract_words(letters, thresh_pairs_y=config.words.thresh_pairs_y, thresh_mswt=config.words.thresh_mswt, thresh_height=config.words.thresh_height, width_scale=config.words.width_scale, _save=config.save, _save_path=config.output) words_h_strokes = get_strokes_from_words(words_h, swt, gray_img) if config.save and config.gt: aaa = draw_strokes_centers( draw_strokes_contours( draw_strokes_connections(img, words_h_strokes), words_h_strokes), words_h_strokes) matrix = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), -np.rad2deg(skew), 1) dst = cv2.warpAffine(aaa, matrix, (img.shape[1], img.shape[0]), cv2.INTER_NEAREST) cv2.imwrite("{}/words_all_original.jpg".format(config.output), dst) _txt = "" _words = words_h_strokes _label = 0 for _w in _words: cx = ((float)(img.shape[1])) / 2 cy = ((float)(img.shape[0])) / 2 wcx = (float)(_w.center[1]) wcy = (float)(_w.center[0]) _corr_x = cx + (wcx - cx) * np.cos(skew) - ( wcy - cy) * np.sin(skew) - wcx _corr_y = cy + (wcx - cx) * np.sin(skew) + ( wcy - cy) * np.cos(skew) - wcy # print("word center x",wcx) # print("word center y",wcy) # print("displacement x",_corr_x) # print("displacement y",_corr_y) # print("minx:{} miny:{} maxx:{} maxy:{}".format(_w.min_x,_w.min_y,_w.max_x,_w.max_y)) # add o.nico to regards!!!!!!!! if skew < -np.pi / 4: _txt += "{} {} {} {} {} {} {} \n".format( _label, 0, int((wcx + _corr_x) - abs(((_w.min_y - wcy)))), int((wcy + _corr_y) - abs(((_w.min_x - wcx)))), int(_w.height), int(_w.width), np.pi / 2 + skew) else: _txt += "{} {} {} {} {} {} {} \n".format( _label, 0, int(_w.min_x + _corr_x), int(_w.min_y + _corr_y), int(_w.width), int(_w.height), skew) _label += 1 with open('{}/gt.gt'.format(config.output), 'w') as file: file.write(_txt) ##### if config.save: # save swt data cv2.imwrite("{}/swt.jpg".format(config.output), swt) # save layers data: box, connection cv2.imwrite("{}/layers.jpg".format(config.output), draw_strokes_contours(img, layers)) cv2.imwrite("{}/layers_connection.jpg".format(config.output), draw_strokes_connections(img, layers)) cv2.imwrite( "{}/layers_all.jpg".format(config.output), draw_strokes_contours(draw_strokes_connections(img, layers), layers)) # save letters data: box, connection, center cv2.imwrite("{}/letters.jpg".format(config.output), draw_strokes_contours(img, letters)) cv2.imwrite("{}/letters_connection.jpg".format(config.output), draw_strokes_connections(img, letters)) cv2.imwrite("{}/letters_center.jpg".format(config.output), draw_strokes_centers(img, letters)) cv2.imwrite( "{}/letters_all.jpg".format(config.output), draw_strokes_centers( draw_strokes_contours(draw_strokes_connections(img, letters), letters), letters)) # save letters data: box, connection, center cv2.imwrite("{}/words_box.jpg".format(config.output), draw_strokes_contours(img, words_h_strokes)) cv2.imwrite("{}/words_connection.jpg".format(config.output), draw_strokes_connections(img, words_h_strokes)) cv2.imwrite("{}/words_center.jpg".format(config.output), draw_strokes_centers(img, words_h_strokes)) cv2.imwrite( "{}/words_all.jpg".format(config.output), draw_strokes_centers( draw_strokes_contours( draw_strokes_connections(img, words_h_strokes), words_h_strokes), words_h_strokes))
from config_manager import Config ## Init pre-defined environments: required_envs = ["required_test_env"] optional_envs = { "optional_env_test1": "1", "optional_env_test2": "2" } ## Load Env file to pars configurations. Configs = Config(required_envs,optional_envs) Configs.load_initial_env("./..env")