def _process_words_in_line(self, line, maxlevel, result_it): for word_no in range( 0, MAX_ELEMENTS ): # iterate until IsAtFinalElement(RIL.LINE, RIL.WORD) if not result_it: log.error("No iterator at '%s'", line.id) break if result_it.Empty(RIL.WORD): log.debug("No word here") break word_id = '%s_word%04d' % (line.id, word_no) log.debug("Recognizing text in word '%s'", word_id) word_bbox = result_it.BoundingBox(RIL.WORD) word = WordType(id=word_id, Coords=CoordsType(points_from_x0y0x1y1(word_bbox))) line.add_Word(word) # todo: determine if font attributes available for word level will work with LSTM models word_attributes = result_it.WordFontAttributes() if word_attributes: word_style = TextStyleType( fontSize=word_attributes['pointsize'] if 'pointsize' in word_attributes else None, fontFamily=word_attributes['font_name'] if 'font_name' in word_attributes else None, bold=None if 'bold' not in word_attributes else word_attributes['bold'], italic=None if 'italic' not in word_attributes else word_attributes['italic'], underlined=None if 'underlined' not in word_attributes else word_attributes['underlined'], monospace=None if 'monospace' not in word_attributes else word_attributes['monospace'], serif=None if 'serif' not in word_attributes else word_attributes['serif']) word.set_TextStyle( word_style) # (or somewhere in custom attribute?) # add word annotation unconditionally (i.e. even for glyph level): word.add_TextEquiv( TextEquivType(Unicode=result_it.GetUTF8Text(RIL.WORD), conf=result_it.Confidence(RIL.WORD) / 100)) if maxlevel == 'word': pass else: self._process_glyphs_in_word(word, result_it) if result_it.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD): break else: result_it.Next(RIL.WORD)
def process(self): """ Segment with kraken """ log = getLogger('processor.KrakenSegment') for (n, input_file) in enumerate(self.input_files): log.info("INPUT FILE %i / %s", n, input_file) downloaded_file = self.workspace.download_file(input_file) log.info("downloaded_file %s", downloaded_file) pcgts = page_from_file(downloaded_file) # TODO binarized variant from get_AlternativeImage() image_url = pcgts.get_Page().imageFilename log.info("pcgts %s", pcgts) im = self.workspace.resolve_image_as_pil(image_url) log.info('Segmenting') log.info('Params %s', self.parameter) res = segment(im, self.parameter['text_direction'], self.parameter['scale'], self.parameter['maxcolseps'], self.parameter['black_colseps']) if self.parameter['script_detect']: res = detect_scripts(im, res) dummyRegion = TextRegionType() pcgts.get_Page().add_TextRegion(dummyRegion) # print(res) for lineno, box in enumerate(res['boxes']): textline = TextLineType( id=concat_padded("line", lineno), Coords=CoordsType(points=points_from_x0y0x1y1(box))) dummyRegion.add_TextLine(textline) ID = concat_padded(self.output_file_grp, n) self.workspace.add_file(self.output_file_grp, pageId=input_file.pageId, ID=ID, mimetype=MIMETYPE_PAGE, local_filename="%s/%s.xml" % (self.output_file_grp, ID), content=to_xml(pcgts).encode('utf-8'))
def _process_glyphs_in_word(self, word, result_it): for glyph_no in range( 0, MAX_ELEMENTS ): # iterate until IsAtFinalElement(RIL.WORD, RIL.SYMBOL) if not result_it: log.error("No iterator at '%s'", word.id) break if result_it.Empty(RIL.SYMBOL): log.debug("No glyph here") break glyph_id = '%s_glyph%04d' % (word.id, glyph_no) log.debug("Recognizing text in glyph '%s'", glyph_id) # glyph_text = result_it.GetUTF8Text(RIL.SYMBOL) # equals first choice? glyph_conf = result_it.Confidence( RIL.SYMBOL) / 100 # equals first choice? #log.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf) glyph_bbox = result_it.BoundingBox(RIL.SYMBOL) glyph = GlyphType(id=glyph_id, Coords=CoordsType( points_from_x0y0x1y1(glyph_bbox))) word.add_Glyph(glyph) choice_it = result_it.GetChoiceIterator() for (choice_no, choice) in enumerate(choice_it): alternative_text = choice.GetUTF8Text() alternative_conf = choice.Confidence() / 100 #log.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf) if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or choice_no > CHOICE_THRESHOLD_NUM): break # todo: consider SymbolIsSuperscript (TextStyle), SymbolIsDropcap (RelationType) etc glyph.add_TextEquiv( TextEquivType(index=choice_no, Unicode=alternative_text, conf=alternative_conf)) if result_it.IsAtFinalElement(RIL.WORD, RIL.SYMBOL): break else: result_it.Next(RIL.SYMBOL)
def test_points_from_x0y0x1y1(self): self.assertEqual(points_from_x0y0x1y1([100, 100, 200, 200]), '100,100 200,100 200,200 100,200')
def process_lines(self, textlines, predfiles, fgrp, regionid): for line in textlines: for file in predfiles: if file == '-'.join([fgrp, regionid, line.id]): self.log.info("Processing text in line '%s'", line.id) filepath = self.root + '/' + file + '.json' with open(filepath) as f: data = json.load(f) linepred = data['predictions'][0]['sentence'] line_conf = [] line_pos = [] w = '' word_conf = [] words = [] word_pos = [] positions = data['predictions'][0]['positions'] for i, d in enumerate(positions): char = d['chars'][0]['char'] char_conf = d['chars'][0]['probability'] char_pos = (d['globalStart'], d['globalEnd']) if char == ' ': words.append(w) w = '' line_conf.append(word_conf) word_conf = [] line_pos.append(word_pos) word_pos = [] else: w += char word_conf.append(char_conf) word_pos.append(char_pos) if i == len(positions) - 1: words.append(w) line_conf.append(word_conf) line_pos.append(word_pos) wconfs = [(min(conf) + max(conf)) / 2 for conf in line_conf] lineconf = (min(wconfs) + max(wconfs)) / 2 line.replace_TextEquiv_at( 0, TextEquivType(Unicode=linepred, conf=str(lineconf))) if self.maxlevel == 'word' or 'glyph': box = bounding_box(line.get_Coords().points) line.Word = [] for w_no, w in enumerate(words): # Coords of word wordbounding = (line_pos[w_no][0][0], line_pos[w_no][-1][-1]) word_bbox = [ box[0] + wordbounding[0], box[1], box[2] + wordbounding[1], box[3] ] word_id = '%s_word%04d' % (line.id, w_no) word = WordType( id=word_id, Coords=CoordsType( points_from_x0y0x1y1(word_bbox))) line.add_Word(word) word.add_TextEquiv( TextEquivType(Unicode=w, conf=str(wconfs[w_no]))) if self.maxlevel == 'glyph': for glyph_no, g in enumerate(w): glyphbounding = ( line_pos[w_no][glyph_no][0], line_pos[w_no][glyph_no][-1]) glyph_bbox = [ box[0] + glyphbounding[0], box[1], box[2] + glyphbounding[1], box[3] ] glyph_id = '%s_glyph%04d' % (word.id, glyph_no) glyph = GlyphType( id=glyph_id, Coords=CoordsType( points_from_x0y0x1y1( glyph_bbox))) word.add_Glyph(glyph) glyph.add_TextEquiv( TextEquivType( Unicode=g, conf=str(line_conf[w_no] [glyph_no])))