Exemplo n.º 1
0
 def run_main(fl):
     try:
         return main(np.asarray(Image.open(fl).convert('L'))/255, 
                     conf=Config(break_width=2.5, recognizer='hmm', 
                                 segmenter='stochastic', page_type='pecha', 
                                 line_break_method='line_cluster'), 
                     page_info={'flname':fl, 'volume': VOL})
     except:
         return []
Exemplo n.º 2
0
    def viterbi_post_process(self, img_arr, results):
        '''Go through all results and attempts to correct invalid syllables'''
        final = [[] for i in range(len(results))]
        for i, line in enumerate(results):
            syllable = []
            for j, char in enumerate(line):
                if char[-1] in u'་། ' or not word_parts_set.intersection(char[-1]) or j == len(line)-1:
                    if syllable:
                        syl_str = ''.join(s[-1] for s in syllable)
                        
                        if is_non_std(syl_str) and syl_str not in syllables:
                            print syl_str, 'HAS PROBLEMS. TRYING TO FIX'
                            bx = combine_many_boxes([ch[0:4] for ch in syllable])
                            bx = list(bx)

                            arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]]
                            arr = fadd_padding(arr, 3)

                            try:
                                temp_dir = tempfile.mkdtemp()
                                tmpimg = os.path.join(temp_dir, 'tmp.tif')
                                Image.fromarray(arr*255).convert('L').save(tmpimg)
                                pgrec = PageRecognizer(tmpimg, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocessing=True, clear_hr=False, detect_o=False))
                                prob, hmm_res = pgrec.recognize_page()
                                os.remove(tmpimg)
                                os.removedirs(temp_dir)
                            except TypeError:
                                print 'HMM run exited with an error.'
                                prob = 0
                                hmm_res = ''
                            
                            logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res))
                            if prob == 0 and hmm_res == '':
                                print 'hit problem. using unmodified output'
                                for s in syllable:
                                    final[i].append(s)
                            else:
                                bx.append(prob)
                                bx.append(hmm_res)
                                final[i].append(bx)
                        else:
                            for s in syllable:
                                final[i].append(s)
                    final[i].append(char)
                    syllable = []
                else:
                    syllable.append(char)
            if syllable:
                for s in syllable:
                    final[i].append(s)
    
        return final
Exemplo n.º 3
0
 def __init__(self):
     # if 'new.txt' exists, show it in a dialog
     if os.path.exists('new.txt'):
         msg = open('new.txt').read()
         new_dlg = WhatsNew(self, msg)
         if new_dlg.ShowModal() == wx.ID_CANCEL:
             os.remove('new.txt')
         else:
             pass
             
     
     configfile = self.get_configfile()
     self.config = Config(configfile)
     self.init_project()
Exemplo n.º 4
0
def run_main(fl, conf=None, text=False):
    '''Helper function to do recognition'''
    if not conf:
#         conf = Config(low_ink=False, segmenter='stochastic', recognizer='hmm', 
#               break_width=2.0, page_type='pecha', line_break_method='line_cluster', 
#               line_cluster_pos='center', postprocess=False, detect_o=False,
#               clear_hr = False)
# 
        conf = Config(segmenter='stochastic', recognizer='hmm', break_width=2.5,  
                      line_break_method='line_cut', postprocess=False,
                      low_ink=False, stop_line_cut=False, clear_hr=True, 
                      detect_o=False)

    return main(np.asarray(Image.open(fl).convert('L'))/255, conf=conf, 
                page_info={'flname':os.path.basename(fl), 'volume': VOL}, 
                text=text)
Exemplo n.º 5
0
def viterbi_post_process(img_arr, results):
    '''Go through all results and attempts to correct invalid syllables'''
    final = [[] for i in range(len(results))]
    for i, line in enumerate(results):
        syllable = []
        for j, char in enumerate(line):
            if char[-1] in u'་། ' or not word_parts.intersection(char[-1]) or j == len(line)-1:
                if syllable:
                    syl_str = ''.join(s[-1] for s in syllable)
                    
                    if is_non_std(syl_str) and syl_str not in syllables:
                        print syl_str, 'HAS PROBLEMS. TRYING TO FIX'
                        bx = combine_many_boxes([ch[0:4] for ch in syllable])
                        bx = list(bx)
                        arr = img_arr[bx[1]:bx[1]+bx[3], bx[0]:bx[0]+bx[2]]
                        arr = fadd_padding(arr, 3)
                        try:
                            
                            prob, hmm_res = main(arr, Config(line_break_method='line_cut', page_type='book', postprocess=False, viterbi_postprocess=True, clear_hr=False), page_info={'flname':''})
                        except TypeError:
                            print 'HMM run exited with an error.'
                            prob = 0
                            hmm_res = ''
                        
#                         corrections[syl_str].append(hmm_res) 
                        logging.info(u'VPP Correction: %s\t%s' % (syl_str, hmm_res))
                        if prob == 0 and hmm_res == '':
                            print 'hit problem. using unmodified output'
                            for s in syllable:
                                final[i].append(s)
                        else:
                            bx.append(prob)
                            bx.append(hmm_res)
                            final[i].append(bx)
                    else:
                        for s in syllable:
                            final[i].append(s)
                final[i].append(char)
                syllable = []
            else:
                syllable.append(char)
        if syllable:
            for s in syllable:
                final[i].append(s)

    return final
Exemplo n.º 6
0
def main():
    conf = Config()
    if not conf.is_set("downloads_path", "documents_path"):
        ##create and set useful paths
        downloads_path, documents_path = get_downloads_and_documents_path()
        conf.add(("downloads_path", downloads_path),
                 ("documents_path", documents_path))
        conf.set_config()
    else:
        ## restore usefull paths
        downloads_path, documents_path = conf.get("downloads_path",
                                                  "documents_path")

    if check_download_and_documents_path(downloads_path, documents_path):
        root, dirs, files = os.walk('.').next()
        for f in files:
            full_path = os.path.join(downloads_path, f)
            print(os.path.splitext(full_path))

    else:
        raise Exception("Download or Documents directory not found")
Exemplo n.º 7
0
def main():
    config = Config.loadConfig('config.ini')
    ensureArchiveDir(config.archiveDir)
    manager = DatabaseManager.createManager(config.archiveDir)
    manager.connectDB()
    for dirpath, dirnames, filenames in os.walk(config.inputDir):
        for filename in filenames:
            try:
                filePath = os.path.join(dirpath, filename)
                extension = os.path.splitext(filePath)[1].strip().strip('.')
                if len(extension) == 0:
                    log.warning(f'skip unknown file: {filePath}')
                elif file_utils.isImage(extension) or file_utils.isVideo(
                        extension):
                    processFile(manager, config.archiveDir, filePath)
                else:
                    log.warning(f'skip unsupported file: {filePath}')
            except Exception as e:
                log.failure(f'Unexpected error: {e}')
    manager.closeDB()
Exemplo n.º 8
0
def run_recognize(imagepath):
    global args
    command_args = args
    if command_args.conf:
        conf_dict = load_config(command_args.conf)
    else:
        conf_dict = default_config
        
    # Override any confs with command line versions
    for key in conf_dict:

        if not hasattr(command_args, key):
            continue
        val = getattr(command_args, key)
        if val:
            conf_dict[key] = val
            
    rec = PageRecognizer(imagepath, conf=Config(**conf_dict))
    if args.format == 'text':
        text = True
    else:
        text = False
    return rec.recognize_page(text=text)
Exemplo n.º 9
0
def main(page_array, conf=Config(viterbi_postprocess=False, line_break_method = None, page_type = None), retries=0,
         text=False, page_info={}):
    '''Main procedure for processing a page from start to finish
    
    Parameters:
    --------------------
    page_array: a 2 dimensional numpy array containing binary pixel data of 
        the image
    
    page_info: dictionary, optional
        A dictionary containing metadata about the page to be recognized.
        Define strings for the keywords "flname" and "volume" if saving
        a serialized copy of the OCR results. 

    retries: Used internally when system attempts to reboot a failed attempt
    
    text: boolean flag. If true, return text rather than char-position data
    
    Returns:
    --------------
    text: str
        Recognized text for entire page
        
    if text=False, return character position and label data as a python dictionary
    '''
    
    print page_info.get('flname','')
    
    confpath = conf.path
    conf = conf.conf
    
    line_break_method = conf['line_break_method']
    page_type = conf['page_type']

    ### Set the line_break method automatically if it hasn't been
    ### specified beforehand
    if not line_break_method and not page_type:
        if page_array.shape[1] > 2*page_array.shape[0]:
            print 'setting page type as pecha'
            line_break_method = 'line_cluster'
            page_type = 'pecha'
        else: 
            print 'setting page type as book'
            line_break_method = 'line_cut'
            page_type = 'book' 
            
    conf['page_type'] = page_type
    conf['line_break_method'] = line_break_method
    detect_o = conf.get('detect_o', False)
    print 'clear hr', conf.get('clear_hr', False)

    results = []
    out = u''
    try:
        ### Get information about the pages
        shapes = PE2(page_array, cls, page_type=page_type, 
                     low_ink=conf['low_ink'], 
                     flpath=page_info.get('flname',''),
                     detect_o=detect_o, 
                     clear_hr =  conf.get('clear_hr', False))
        shapes.conf = conf
        
        ### Separate the lines on a page
        if page_type == 'pecha':
            k_groups = shapes.num_lines
        shapes.viterbi_post = conf['viterbi_postprocess']
        
        if line_break_method == 'line_cut':
            line_info = LineCut(shapes)
            if not line_info: # immediately skip to re-run with LineCluster
                sys.exit()
        elif line_break_method == 'line_cluster':
            line_info = LineCluster(shapes, k=k_groups)
        
        
        ### Perform segmentation of characters
        segmentation = Segmenter(line_info)

        ###Perform recognition
        if not conf['viterbi_postprocess']:
            if conf['recognizer'] == 'probout':
                results = recognize_chars_probout(segmentation)
            elif conf['recognizer'] == 'hmm':
                results = recognize_chars_hmm(segmentation, trans_p, start_p)
            elif conf['recognizer'] == 'kama':
                results = recognize_chars_probout(segmentation)
                results = recognize_chars_kama(results, segmentation)
            if conf['postprocess']:
                results = viterbi_post_process(segmentation.line_info.shapes.img_arr, results)
        else: # Should only be call from *within* a non viterbi run...

            prob, results = hmm_recognize_bigram(segmentation)
            return prob, results
        
        
        ### Construct an output string
        output  = []
        for n, line in enumerate(results):
            for m,k in enumerate(line):
#                 if isinstance(k[-1], int):
#                     print n,m,k
#                     page_array[k[1]:k[1]+k[3], k[0]:k[0]+k[2]] = 0
#                     Image.fromarray(page_array*255).show()
                    
                output.append(k[-1])
            output.append(u'\n')

        out =  ''.join(output)
        print out
    
        if text:
            results = out
        
        return results
    except:
        ### Retry and assume the error was cause by use of the
        ### wrong line_break_method...
        import traceback;traceback.print_exc()
        if not results and not conf['viterbi_postprocess']:
            print 'WARNING', '*'*40
            print page_info['flname'], 'failed to return a result.'
            print 'WARNING', '*'*40
            print
            if line_break_method == 'line_cut' and retries < 1:
                print 'retrying with line_cluster instead of line_cut'
                try:
                    return main(page_array, conf=Config(path=confpath, line_break_method='line_cluster', page_type='pecha'), page_info=page_info, retries = 1, text=text)
                except:
                    logging.info('Exited after failure of second run.')
                    return []
        if not conf['viterbi_postprocess']: 
            if not results:
                logging.info('***** No OCR output for %s *****' % page_info['flname'])
            return results
Exemplo n.º 10
0
                        page_info={'flname':fl, 'volume': VOL})
        except:
            return []
    import datetime
    start = datetime.datetime.now()
    print 'starting'
    outfile = codecs.open('/home/zr/latest-ocr-outfile.txt', 'w', 'utf-8')
    
    for fl in fls:
        
        #### line cut
#         ret = main((np.asarray(Image.open(fl).convert('L'))/255), 
#            conf=Config(break_width=2., recognizer='probout', 
#            segmenter='stochastic', line_break_method='line_cut', 
#            postprocess=False, stop_line_cut=False, low_ink=False, clear_hr=True), 
#                    page_info={'flname':fl, 'volume': VOL}, text=True)

        #### line cluster
        ret = main((np.asarray(Image.open(fl).convert('L'))/255), 
                   conf=Config(segmenter='stochastic', recognizer='hmm', 
                               break_width=2.0, page_type='pecha', 
                               line_break_method='line_cluster',
                               line_cluster_pos='center', postprocess=False,
                                detect_o=False, low_ink=False, clear_hr=True), 
                    page_info={'flname':fl, 'volume': VOL}, text=True)
        outfile.write(ret)
        outfile.write('\n\n')

    print datetime.datetime.now() - start, 'time taken'
 
Exemplo n.º 11
0
def run_recognize_remote(imagepath, conf_dict, text=False):
    rec = PageRecognizer(imagepath, conf=Config(**conf_dict))
    results = rec.recognize_page(text=text)
    return results
Exemplo n.º 12
0
    def recognize_page(self, text=False):
        try:
            self.get_page_elements()
            self.extract_lines()
        except:
            import traceback;traceback.print_exc()
            self.results = []
            return self.results
        
        self.generate_segmentation()
        
        conf = self.conf
        results = []
        try:
            if not conf['viterbi_postprocessing']:
                if conf['recognizer'] == 'probout':
                    results = recognize_chars_probout(self.segmentation)
                elif conf['recognizer'] == 'hmm':
                    results = recognize_chars_hmm(self.segmentation)
    
                if conf['postprocess']:
#                     print 'running viterbi post processing as next iter'
                    results = self.viterbi_post_process(self.page_array, results)
            else: # Should only be call from *within* a non viterbi run...
               # print 'Debug: Running within viterbi post proc'
                prob, results = hmm_recognize_bigram(self.segmentation)
    
                return prob, results
                
            output  = []
            for n, line in enumerate(results):
                for m,k in enumerate(line):
                    if isinstance(k[-1], int):
                        print n,m,k
                        self.page_array[k[1]:k[1]+k[3], k[0]:k[0]+k[2]] = 0
                        Image.fromarray(self.page_array*255).show()
                        
                    output.append(k[-1])
    
                output.append(u'\n')
    
            out =  ''.join(output)
            print out
        
            if text:
                results = out
            
            self.results = results
            return results
        except:
            import traceback;traceback.print_exc()
            if not results and not conf['viterbi_postprocessing']:
                print 'WARNING', '*'*40
                print self.page_info['flname'], 'failed to return a result.'
                print 'WARNING', '*'*40
                print
                if self.line_break_method == 'line_cut' and self.retries < 1:
                    print 'retrying with line_cluster instead of line_cut'
                    try:
                        pr = PageRecognizer(self.imagefile, Config(path=self.confpath, line_break_method='line_cluster', page_type='pecha'), page_info=self.page_info, retries = 1, text=text)
                        return pr.recognize_page()
                    except:
                        logging.info('Exited after failure of second run.')
                        return []
            if not conf['viterbi_postprocessing']: 
                if not results:
                    logging.info('***** No OCR output for %s *****' % self.page_info['flname'])
                if text:
                    results = out
                self.results = results
                return results
Exemplo n.º 13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("conf", help="JSON configuration file", type=str)
    args = parser.parse_args()

    # Load configuration file
    try:
        with open(args.conf, "r") as f:
            config = Config(**json.load(f))
    except Exception as e:
        print(e)
        return

    # if save is enabled crete the output folder if not exists
    if config.save:
        if not os.path.isdir(config.output):
            os.mkdir(config.output)

    # read the input image
    img = cv2.imread(config.input)
    # check if is a valid image
    if img is None:
        print(ERR_INPUT_NOT_EXISTS)
        return

    # convert the image to gray-scale
    gray_img = img.copy()
    if len(img.shape) == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray_img = gray_img

    # get edges map
    gray_img_blur = cv2.GaussianBlur(gray_img, (5, 5), 0)
    #thresh, _ = cv2.threshold(gray_img_blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    thresh, _ = cv2.threshold(gray_img, 0, 255,
                              cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    edges = cv2.Canny(gray_img_blur, thresh / 2, thresh, apertureSize=3)

    skew = 0.
    if config.deskew:
        # apply Stroke Width Transform
        swt = swt_transform(
            gray_img,
            edges,
            psi=np.pi / 2,
            dark_on_light=config.dark_on_light,
            _save=config.save,
            _save_path=config.output,
        )

        # apply segmentation on swt ang get layers
        _, layers = swt_segmentation(swt, gray_img)

        # filters layers in letter
        letters = swt_extract_letters(
            gray_img,
            layers,
            min_width=config.letters.min_width,
            min_height=config.letters.min_height,
            max_width=config.letters.max_width,
            max_height=config.letters.max_height,
            width_height_ratio=config.letters.width_height_ratio,
            height_width_ratio=config.letters.height_width_ratio,
            min_diag_mswt_ratio=config.letters.min_diag_mswt_ratio,
            max_diag_mswt_ratio=config.letters.max_diag_mswt_ratio,
        )

        letters_image = create_letters_edge_image(gray_img, edges, letters)

        if config.save:
            cv2.imwrite("{}/pre_swt.jpg".format(config.output), swt)
            cv2.imwrite("{}/pre_layers_conn.jpg".format(config.output),
                        draw_strokes_connections(img, layers))
            cv2.imwrite("{}/pre_layers.jpg".format(config.output),
                        draw_strokes_contours(img, layers))
            cv2.imwrite("{}/pre_letters.jpg".format(config.output),
                        draw_strokes_contours(img, letters))
            cv2.imwrite("{}/pre_letters_conmn.jpg".format(config.output),
                        draw_strokes_connections(img, letters))
            cv2.imwrite("{}/deskew_points.jpg".format(config.output),
                        letters_image)

        img, skew = skew_correction(img,
                                    letters_image,
                                    threshold=None,
                                    _save=config.save,
                                    _save_path=config.output)

        gray_img = img.copy()
        if len(img.shape) == 3:
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        else:
            gray_img = gray_img

        gray_img_blur = cv2.GaussianBlur(gray_img, (5, 5), 0)
        #thresh, _ = cv2.threshold(gray_img_blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        thresh, _ = cv2.threshold(gray_img, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        edges = cv2.Canny(gray_img_blur, thresh / 2, thresh, apertureSize=3)

    # apply Stroke Width Transform
    swt = swt_transform(
        gray_img,
        edges,
        psi=np.pi / 2,
        dark_on_light=config.dark_on_light,
        skip_edges=config.swt_skip_edges,
        _save=config.save,
        _save_path=config.output,
    )
    #swt[ gray_img>np.mean(gray_img) ] = np.Infinity

    # apply segmentation on swt ang get layers
    _, layers = swt_segmentation(
        swt,
        gray_img,
        skip_edges=config.swt_skip_edges,
    )

    # filters layers in letter
    letters = swt_extract_letters(
        gray_img,
        layers,
        min_width=config.letters.min_width,
        min_height=config.letters.min_height,
        max_width=config.letters.max_width,
        max_height=config.letters.max_height,
        width_height_ratio=config.letters.width_height_ratio,
        height_width_ratio=config.letters.height_width_ratio,
        min_diag_mswt_ratio=config.letters.min_diag_mswt_ratio,
        max_diag_mswt_ratio=config.letters.max_diag_mswt_ratio,
    )

    # union letters in words
    words_h, _ = swt_extract_words(letters,
                                   thresh_pairs_y=config.words.thresh_pairs_y,
                                   thresh_mswt=config.words.thresh_mswt,
                                   thresh_height=config.words.thresh_height,
                                   width_scale=config.words.width_scale,
                                   _save=config.save,
                                   _save_path=config.output)

    words_h_strokes = get_strokes_from_words(words_h, swt, gray_img)

    if config.save and config.gt:

        aaa = draw_strokes_centers(
            draw_strokes_contours(
                draw_strokes_connections(img, words_h_strokes),
                words_h_strokes), words_h_strokes)
        matrix = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2),
                                         -np.rad2deg(skew), 1)
        dst = cv2.warpAffine(aaa, matrix, (img.shape[1], img.shape[0]),
                             cv2.INTER_NEAREST)
        cv2.imwrite("{}/words_all_original.jpg".format(config.output), dst)

        _txt = ""
        _words = words_h_strokes
        _label = 0
        for _w in _words:

            cx = ((float)(img.shape[1])) / 2
            cy = ((float)(img.shape[0])) / 2

            wcx = (float)(_w.center[1])
            wcy = (float)(_w.center[0])

            _corr_x = cx + (wcx - cx) * np.cos(skew) - (
                wcy - cy) * np.sin(skew) - wcx
            _corr_y = cy + (wcx - cx) * np.sin(skew) + (
                wcy - cy) * np.cos(skew) - wcy

            # print("word center x",wcx)
            # print("word center y",wcy)

            # print("displacement x",_corr_x)
            # print("displacement y",_corr_y)

            # print("minx:{} miny:{} maxx:{} maxy:{}".format(_w.min_x,_w.min_y,_w.max_x,_w.max_y))

            # add o.nico to regards!!!!!!!!

            if skew < -np.pi / 4:

                _txt += "{} {} {} {} {} {} {} \n".format(
                    _label, 0, int((wcx + _corr_x) - abs(((_w.min_y - wcy)))),
                    int((wcy + _corr_y) - abs(((_w.min_x - wcx)))),
                    int(_w.height), int(_w.width), np.pi / 2 + skew)
            else:
                _txt += "{} {} {} {} {} {} {} \n".format(
                    _label, 0,
                    int(_w.min_x + _corr_x), int(_w.min_y + _corr_y),
                    int(_w.width), int(_w.height), skew)
            _label += 1

        with open('{}/gt.gt'.format(config.output), 'w') as file:
            file.write(_txt)

    #####

    if config.save:

        # save swt data
        cv2.imwrite("{}/swt.jpg".format(config.output), swt)

        # save layers data: box, connection
        cv2.imwrite("{}/layers.jpg".format(config.output),
                    draw_strokes_contours(img, layers))
        cv2.imwrite("{}/layers_connection.jpg".format(config.output),
                    draw_strokes_connections(img, layers))
        cv2.imwrite(
            "{}/layers_all.jpg".format(config.output),
            draw_strokes_contours(draw_strokes_connections(img, layers),
                                  layers))

        # save letters data: box, connection, center
        cv2.imwrite("{}/letters.jpg".format(config.output),
                    draw_strokes_contours(img, letters))
        cv2.imwrite("{}/letters_connection.jpg".format(config.output),
                    draw_strokes_connections(img, letters))
        cv2.imwrite("{}/letters_center.jpg".format(config.output),
                    draw_strokes_centers(img, letters))
        cv2.imwrite(
            "{}/letters_all.jpg".format(config.output),
            draw_strokes_centers(
                draw_strokes_contours(draw_strokes_connections(img, letters),
                                      letters), letters))

        # save letters data: box, connection, center
        cv2.imwrite("{}/words_box.jpg".format(config.output),
                    draw_strokes_contours(img, words_h_strokes))
        cv2.imwrite("{}/words_connection.jpg".format(config.output),
                    draw_strokes_connections(img, words_h_strokes))
        cv2.imwrite("{}/words_center.jpg".format(config.output),
                    draw_strokes_centers(img, words_h_strokes))
        cv2.imwrite(
            "{}/words_all.jpg".format(config.output),
            draw_strokes_centers(
                draw_strokes_contours(
                    draw_strokes_connections(img, words_h_strokes),
                    words_h_strokes), words_h_strokes))
Exemplo n.º 14
0
from config_manager import Config


## Init pre-defined environments:
required_envs = ["required_test_env"]
optional_envs = {
    "optional_env_test1": "1",
    "optional_env_test2": "2"
}

## Load Env file to pars configurations.
Configs = Config(required_envs,optional_envs)
Configs.load_initial_env("./..env")