def test_load_pyrnn_no_seqrecognizer(self): """ Test correct handling of non-SeqRecognizer pickles. """ pickle.dump(u'Iámnõtãrécðçnízer', self.temp) self.temp.close() models.load_any(self.temp.name)
def test_load_pyrnn_no_seqrecognizer(self): """ Test correct handling of non-SeqRecognizer pickles. """ pickle.dump(u'Iámnõtãrécðçnízer', self.temp) self.temp.close() with raises(KrakenInvalidModelException): models.load_any(self.temp.name)
def test_load_clstm(self): """ Tests loading of valid clstm files. """ rnn = models.load_any( os.path.join(resources, 'toy.clstm').encode('utf-8')) self.assertIsInstance(rnn, models.TorchSeqRecognizer)
def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction, threads): """ Recognizes text in line images. """ from kraken.lib import models if ctx.meta['input_format_type'] != 'image' and no_segmentation: raise click.BadParameter( 'no_segmentation mode is incompatible with page/alto inputs') if reorder and base_dir != 'auto': reorder = base_dir # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR nm = {} # type: Dict[str, models.TorchSeqRecognizer] ign_tags = model.pop('ignore') for k, v in model.items(): search = [ v, os.path.join(click.get_app_dir(APP_NAME), v), os.path.join(LEGACY_MODEL_DIR, v) ] location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter(f'No model for {k} found') message(f'Loading ANN {k}\t', nl=False) try: rnn = models.load_any(location, device=ctx.meta['device']) nm[k] = rnn except Exception: if ctx.meta['raise_failed']: raise message('\u2717', fg='red') ctx.exit(1) message('\u2713', fg='green') if 'default' in nm: from collections import defaultdict nn = defaultdict(lambda: nm['default'] ) # type: Dict[str, models.TorchSeqRecognizer] nn.update(nm) nm = nn # thread count is global so setting it once is sufficient nm[k].nn.set_num_threads(threads) # set output mode ctx.meta['text_direction'] = text_direction return partial(recognizer, model=nm, pad=pad, no_segmentation=no_segmentation, bidi_reordering=reorder, tags_ignore=ign_tags)
def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ nn = load_any(os.path.join(resources, 'toy.clstm')) pred = rpred(nn, self.im, {'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal'}, True) next(pred)
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images): st_time = time.time() ti = transcribe.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def publish(ctx, metadata, access_token, model): """ Publishes a model on the zenodo model repository. """ import json import pkg_resources from functools import partial from jsonschema import validate from jsonschema.exceptions import ValidationError from kraken import repo from kraken.lib import models with pkg_resources.resource_stream(__name__, 'metadata.schema.json') as fp: schema = json.load(fp) nn = models.load_any(model) if not metadata: author = click.prompt('author') affiliation = click.prompt('affiliation') summary = click.prompt('summary') description = click.edit('Write long form description (training data, transcription standards) of the model here') accuracy_default = None # take last accuracy measurement in model metadata if 'accuracy' in nn.nn.user_metadata and nn.nn.user_metadata['accuracy']: accuracy_default = nn.nn.user_metadata['accuracy'][-1][1] * 100 accuracy = click.prompt('accuracy on test set', type=float, default=accuracy_default) script = [click.prompt('script', type=click.Choice(sorted(schema['properties']['script']['items']['enum'])), show_choices=True)] license = click.prompt('license', type=click.Choice(sorted(schema['properties']['license']['enum'])), show_choices=True) metadata = { 'authors': [{'name': author, 'affiliation': affiliation}], 'summary': summary, 'description': description, 'accuracy': accuracy, 'license': license, 'script': script, 'name': os.path.basename(model), 'graphemes': ['a'] } while True: try: validate(metadata, schema) except ValidationError as e: message(e.message) metadata[e.path[-1]] = click.prompt(e.path[-1], type=float if e.schema['type'] == 'number' else str) continue break else: metadata = json.load(metadata) validate(metadata, schema) metadata['graphemes'] = [char for char in ''.join(nn.codec.c2l.keys())] oid = repo.publish_model(model, metadata, access_token, partial(message, '.', nl=False)) print('\nmodel PID: {}'.format(oid))
def ocr(ctx, model, pad, reorder, serialization, text_direction, lines, conv): """ Recognizes text in line images. """ # we do the locating and loading of the model here to spare us the overhead # in each worker. # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR search = [ model, os.path.join(click.get_app_dir(APP_NAME), model), os.path.join(LEGACY_MODEL_DIR, model) ] # if automatic conversion is enabled we look for an converted model in # ~/.kraken if conv is True: search.insert( 0, os.path.join( click.get_app_dir(APP_NAME), os.path.basename(os.path.splitext(model)[0]) + '.hdf5')) location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter('No model found') click.echo('Loading RNN\t', nl=False) try: rnn = models.load_any(location) except: click.secho(u'\u2717', fg='red') raise ctx.exit(1) click.secho(u'\u2713', fg='green') # convert input model to protobuf if conv and rnn.kind == 'pyrnn': name, _ = os.path.splitext(os.path.basename(model)) op = os.path.join(click.get_app_dir(APP_NAME), name + '.pronn') try: os.makedirs(click.get_app_dir(APP_NAME)) except OSError: pass models.pyrnn_to_pronn(rnn, op) # set output mode ctx.meta['mode'] = serialization ctx.meta['text_direction'] = text_direction return partial(recognizer, model=rnn, pad=pad, bidi_reordering=reorder, lines=lines)
def test_rpred_outbounds(self): """ Tests correct handling of invalid line coordinates. """ nn = load_any(os.path.join(resources, 'toy.clstm')) pred = rpred(nn, self.im, { 'boxes': [[-1, -1, 10000, 10000]], 'text_direction': 'horizontal' }, True) next(pred)
def transcription(ctx, font, font_style, prefill, output, images): st_time = time.time() ti = transcrib.TranscriptionInterface(font, font_style) if prefill: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Loading model {}'.format(time.time() - st_time, prefill)) else: spin('Loading RNN') prefill = models.load_any(prefill) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) for fp in images: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Reading {}'.format(time.time() - st_time, fp.name)) else: spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Binarizing page'.format(time.time() - st_time)) im = binarization.nlbin(im) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Segmenting page'.format(time.time() - st_time)) res = pageseg.segment(im) if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] {}'.format(time.time() - st_time, pred.prediction)) else: spin('Recognizing') preds.append(pred) if ctx.meta['verbose'] > 0: click.echo(u'Execution time: {}s'.format(time.time() - st_time)) else: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) ti.add_page(im, records=preds) else: ti.add_page(im, res) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False) if ctx.meta['verbose'] > 0: click.echo(u'[{:2.4f}] Writing transcription to {}'.format(time.time() - st_time, output.name)) else: spin('Writing output') ti.write(output) if not ctx.meta['verbose']: click.secho(u'\b\u2713', fg='green', nl=False) click.echo('\033[?25h\n', nl=False)
def ocr(ctx, model, pad, reorder, no_segmentation, serializer, text_direction, lines, threads): """ Recognizes text in line images. """ from kraken.lib import models # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR nm = {} # type: Dict[str, models.TorchSeqRecognizer] ign_scripts = model.pop('ignore') for k, v in model.items(): search = [ v, os.path.join(click.get_app_dir(APP_NAME), v), os.path.join(LEGACY_MODEL_DIR, v) ] location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter('No model for {} found'.format(k)) message('Loading RNN {}\t'.format(k), nl=False) try: rnn = models.load_any(location, device=ctx.meta['device']) nm[k] = rnn except Exception: message('\u2717', fg='red') raise ctx.exit(1) message('\u2713', fg='green') if 'default' in nm: from collections import defaultdict nn = defaultdict(lambda: nm['default'] ) # type: Dict[str, models.TorchSeqRecognizer] nn.update(nm) nm = nn # thread count is global so setting it once is sufficient nn[k].nn.set_num_threads(threads) # set output mode ctx.meta['mode'] = serializer ctx.meta['text_direction'] = text_direction return partial(recognizer, model=nm, pad=pad, no_segmentation=no_segmentation, bidi_reordering=reorder, script_ignore=ign_scripts, lines=lines)
def ocr(ctx, model, pad, reorder, serialization, text_direction, lines, conv): """ Recognizes text in line images. """ # we do the locating and loading of the model here to spare us the overhead # in each worker. # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR nm = {} for k, v in model.items(): search = [v, os.path.join(click.get_app_dir(APP_NAME), v), os.path.join(LEGACY_MODEL_DIR, v)] # if automatic conversion is enabled we look for an converted model in # ~/.kraken if conv is True: search.insert(0, os.path.join(click.get_app_dir(APP_NAME), os.path.basename(os.path.splitext(v)[0]) + '.pronn')) location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter('No model for {} found'.format(k)) message('Loading RNN {}\t'.format(k), nl=False) try: rnn = models.load_any(location.encode('utf-8')) nm[k] = rnn except: message(u'\u2717', fg='red') raise ctx.exit(1) message(u'\u2713', fg='green') # convert input model to protobuf if conv and rnn.kind == 'pyrnn': name, _ = os.path.splitext(os.path.basename(v)) op = os.path.join(click.get_app_dir(APP_NAME), name.encode('utf-8') + '.pronn') try: os.makedirs(click.get_app_dir(APP_NAME)) except OSError: pass models.pyrnn_to_pronn(rnn, op) if 'default' in nm: nn = defaultdict(lambda: nm['default']) nn.update(nm) nm = nn # set output mode ctx.meta['mode'] = serialization ctx.meta['text_direction'] = text_direction return partial(recognizer, model=nm, pad=pad, bidi_reordering=reorder, lines=lines)
def ocr(ctx, model, pad, reorder, no_segmentation, serializer, text_direction, lines, threads): """ Recognizes text in line images. """ from kraken.lib import models # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR nm = {} # type: Dict[str, models.TorchSeqRecognizer] ign_scripts = model.pop('ignore') for k, v in model.items(): search = [v, os.path.join(click.get_app_dir(APP_NAME), v), os.path.join(LEGACY_MODEL_DIR, v)] location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter('No model {} for {} found'.format(v, k)) message('Loading RNN {}\t'.format(k), nl=False) try: rnn = models.load_any(location, device=ctx.meta['device']) nm[k] = rnn except Exception: message('\u2717', fg='red') raise ctx.exit(1) message('\u2713', fg='green') if 'default' in nm: from collections import defaultdict nn = defaultdict(lambda: nm['default']) # type: Dict[str, models.TorchSeqRecognizer] nn.update(nm) nm = nn # thread count is global so setting it once is sufficient nn[k].nn.set_num_threads(threads) # set output mode ctx.meta['mode'] = serializer ctx.meta['text_direction'] = text_direction return partial(recognizer, model=nm, pad=pad, no_segmentation=no_segmentation, bidi_reordering=reorder, script_ignore=ign_scripts, lines=lines)
def ocr(ctx, model, pad, hocr, lines, conv): """ Recognizes text in line images. """ # we do the locating and loading of the model here to spare us the overhead # in each worker. # first we try to find the model in the absolue path, then ~/.kraken, then # LEGACY_MODEL_DIR search = [model, os.path.join(click.get_app_dir(APP_NAME), model), os.path.join(LEGACY_MODEL_DIR, model)] # if automatic conversion is enabled we look for an converted model in # ~/.kraken if conv is True: search.insert(0, os.path.join(click.get_app_dir(APP_NAME), os.path.basename(os.path.splitext(model)[0]) + '.hdf5')) location = None for loc in search: if os.path.isfile(loc): location = loc break if not location: raise click.BadParameter('No model found') click.echo('Loading RNN\t', nl=False) try: rnn = models.load_any(location) except: click.secho(u'\u2717', fg='red') raise ctx.exit(1) click.secho(u'\u2713', fg='green') # convert input model to protobuf if conv and rnn.kind == 'pyrnn': name, _ = os.path.splitext(os.path.basename(model)) op = os.path.join(click.get_app_dir(APP_NAME), name + '.pronn') try: os.makedirs(click.get_app_dir(APP_NAME)) except OSError: pass models.pyrnn_to_pronn(rnn, op) # set output mode if hocr: ctx.meta['mode'] = 'hocr' else: ctx.meta['mode'] = 'text' return partial(recognizer, model=rnn, pad=pad, lines=lines)
def cli(format_type, model, output, files): """ A script producing overlays of lines and regions from either ALTO or PageXML files or run a model to do the same. """ if len(files) == 0: ctx = click.get_current_context() click.echo(ctx.get_help()) ctx.exit() from PIL import Image, ImageDraw from kraken.lib import models, xml from kraken import align, serialization if format_type == 'xml': fn = xml.parse_xml elif format_type == 'alto': fn = xml.parse_palto else: fn = xml.parse_page click.echo(f'Loading model {model}') net = models.load_any(model) for doc in files: click.echo(f'Processing {doc} ', nl=False) data = fn(doc) im = Image.open(data['image']).convert('RGBA') records = align.forced_align(data, net) if output == 'overlay': tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for record in records: for pol in record.cuts: c = next(cmap) draw.polygon([tuple(x) for x in pol], fill=c, outline=c[:3]) base_image = Image.alpha_composite(im, tmp) base_image.save(f'high_{os.path.basename(doc)}_algn.png') else: with open(f'{os.path.basename(doc)}_algn.xml', 'w') as fp: fp.write( serialization.serialize(records, image_name=data['image'], regions=data['regions'], template=output)) click.secho('\u2713', fg='green')
def ensureLoaded(self): if self.model is None: engine = self.engine C = engine.C tm = engine.tm info = tm.info modelPath = C.modelPath info(f"Loading for Kraken: {unexpanduser(modelPath)}", force=True) with warnings.catch_warnings(): warnings.filterwarnings("ignore") model = load_any(modelPath) info("model loaded", force=True) self.model = model return self.model
def transcription(ctx, text_direction, scale, maxcolseps, black_colseps, font, font_style, prefill, output, images, segment_page): ti = transcribe.TranscriptionInterface(font, font_style) if prefill: logger.info('Loading model {}'.format(prefill)) spin('Loading RNN') prefill = models.load_any(prefill.encode('utf-8')) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) for fp in images: logger.info('Reading {}'.format(fp.name)) spin('Reading images') im = Image.open(fp) if not binarization.is_bitonal(im): logger.info(u'Binarizing page') im = binarization.nlbin(im) if segment_page: logger.info(u'Segmenting page') res = pageseg.segment(im, text_direction, scale, maxcolseps, black_colseps) else: res = { 'text_direction': 'horizontal-tb', 'boxes': [(0, 0) + im.size] } if prefill: it = rpred.rpred(prefill, im, res) preds = [] for pred in it: logger.info('{}'.format(pred.prediction)) spin('Recognizing') preds.append(pred) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False) logger.info(u'Writing transcription to {}'.format(output.name)) spin('Writing output') ti.write(output) message(u'\b\u2713', fg='green', nl=False) message('\033[?25h\n', nl=False)
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError('--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning('Input {} is in {} color mode. Converting to RGB'.format(fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def detect_scripts(im, bounds, model=pkg_resources.resource_filename( __name__, 'script.mlmodel'), valid_scripts=None): """ Detects scripts in a segmented page. Classifies lines returned by the page segmenter into runs of scripts/writing systems. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' bounds (dict): A dictionary containing a 'boxes' entry with a list of coordinates (x0, y0, x1, y1) of a text line in the image and an entry 'text_direction' containing 'horizontal-lr/rl/vertical-lr/rl'. model (str): Location of the script classification model or None for default. valid_scripts (list): List of valid scripts. Returns: {'script_detection': True, 'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text direction and a list of lists of reading order sorted bounding boxes under the key 'boxes' with each list containing the script segmentation of a single line. Script is a ISO15924 4 character identifier. Raises: KrakenInvalidModelException if no clstm module is available. """ raise NotImplementedError( 'Temporarily unavailable. Please open a github ticket if you want this fixed sooner.' ) im_str = get_im_str(im) logger.info(u'Detecting scripts with {} in {} lines on {}'.format( model, len(bounds['boxes']), im_str)) logger.debug(u'Loading detection model {}'.format(model)) rnn = models.load_any(model) # load numerical to 4 char identifier map logger.debug(u'Loading label to identifier map') with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp: n2s = json.load(fp) # convert allowed scripts to labels val_scripts = [] if valid_scripts: logger.debug( u'Converting allowed scripts list {}'.format(valid_scripts)) for k, v in n2s.items(): if v in valid_scripts: val_scripts.append(chr(int(k) + 0xF0000)) else: valid_scripts = [] it = rpred(rnn, im, bounds, bidi_reordering=False) preds = [] logger.debug(u'Running detection') for pred, bbox in zip(it, bounds['boxes']): # substitute inherited scripts with neighboring runs def _subs(m, s, r=False): p = u'' for c in s: if c in m and p and not r: p += p[-1] elif c not in m and p and r: p += p[-1] else: p += c return p logger.debug(u'Substituting scripts') p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction) # do a reverse run to fix leading inherited scripts pred.prediction = ''.join( reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p)))) # group by valid scripts. two steps: 1. substitute common confusions # (Latin->Fraktur and Syriac->Arabic) if given in script list. if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0') if 'Latn' in valid_scripts and 'Latf' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7') # next merge adjacent scripts if val_scripts: pred.prediction = _subs(val_scripts, pred.prediction, r=True) # group by grapheme t = [] logger.debug(u'Merging detections') # if line contains only a single script return whole line bounding box if len(set(pred.prediction)) == 1: logger.debug('Only one script on line. Emitting whole line bbox') k = ord(pred.prediction[0]) - 0xF0000 t.append((n2s[str(k)], bbox)) else: for k, g in groupby(pred, key=lambda x: x[0]): # convert to ISO15924 numerical identifier k = ord(k) - 0xF0000 b = max_bbox(x[1] for x in g) t.append((n2s[str(k)], b)) preds.append(t) return { 'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True }
def transcription(ctx, text_direction, scale, bw, maxcolseps, black_colseps, font, font_style, prefill, pad, lines, output, images): """ Creates transcription environments for ground truth generation. """ from PIL import Image from kraken import rpred from kraken import pageseg from kraken import transcribe from kraken import binarization from kraken.lib import models from kraken.lib.util import is_bitonal ti = transcribe.TranscriptionInterface(font, font_style) if len(images) > 1 and lines: raise click.UsageError( '--lines option is incompatible with multiple image files') if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading RNN', nl=False) prefill = models.load_any(prefill) message('\u2713', fg='green') with log.progressbar(images, label='Reading images') as bar: for fp in bar: logger.info('Reading {}'.format(fp.name)) im = Image.open(fp) if im.mode not in ['1', 'L', 'P', 'RGB']: logger.warning( 'Input {} is in {} color mode. Converting to RGB'.format( fp.name, im.mode)) im = im.convert('RGB') logger.info('Binarizing page') im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') if not lines: res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) else: with open_file(lines, 'r') as fp: try: fp = cast(IO[Any], fp) res = json.load(fp) except ValueError as e: raise click.UsageError( '{} invalid segmentation: {}'.format( lines, str(e))) if prefill: it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) ti.add_page(im, res, records=preds) else: ti.add_page(im, res) fp.close() logger.info('Writing transcription to {}'.format(output.name)) message('Writing output', nl=False) ti.write(output) message('\u2713', fg='green')
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ output_path = ( doc[0], os.path.splitext(storage.insert_suffix(doc[1], method, model))[0] + '.xml') logger.debug('Loading model {}'.format(model)) try: rnn = models.load_any(mod_db[model]) except Exception as e: raise NidabaInvalidParameterException(str(e)) logger.debug('Reading TEI segmentation from {}'.format(doc)) tei = OCRRecord() with storage.StorageFile(*doc) as seg: tei.load_tei(seg) img = Image.open( storage.get_abs_path(*storage.get_storage_path_url(tei.img))) if is_bitonal(img): img = img.convert('1') else: raise NidabaInvalidParameterException('Input image is not bitonal') logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines i = 0 rnn = models.load_any(mod_db[model]) logger.debug('Start recognizing characters') for line_id, rec in izip( lines, rpred.rpred( rnn, img, { 'text_direction': 'horizontal-tb', 'boxes': [list(x['bbox']) for x in lines.itervalues()] })): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(line_id)) tei.scope_line(line_id) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug( 'Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format( rec.prediction[line_offset:line_offset + len(segment)])) tei.add_graphemes([{ 'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100) } for x in rec[line_offset:line_offset + len(segment)]]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format( rec.prediction[line_offset:line_offset + len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([{ 'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100) } for x in rec[line_offset:line_offset + len(whitespace)]]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write_tei(fp) return output_path
def test_load_any_pyrnn(self): """ Test load_any loads pickled models. """ rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz')) self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
def publish(ctx, metadata, access_token, model): """ Publishes a model on the zenodo model repository. """ import json import pkg_resources from functools import partial from jsonschema import validate from jsonschema.exceptions import ValidationError from kraken import repo from kraken.lib import models with pkg_resources.resource_stream(__name__, 'metadata.schema.json') as fp: schema = json.load(fp) nn = models.load_any(model) if not metadata: author = click.prompt('author') affiliation = click.prompt('affiliation') summary = click.prompt('summary') description = click.edit( 'Write long form description (training data, transcription standards) of the model here' ) accuracy_default = None # take last accuracy measurement in model metadata if 'accuracy' in nn.nn.user_metadata and nn.nn.user_metadata[ 'accuracy']: accuracy_default = nn.nn.user_metadata['accuracy'][-1][1] * 100 accuracy = click.prompt('accuracy on test set', type=float, default=accuracy_default) script = [ click.prompt( 'script', type=click.Choice( sorted(schema['properties']['script']['items']['enum'])), show_choices=True) ] license = click.prompt( 'license', type=click.Choice(sorted(schema['properties']['license']['enum'])), show_choices=True) metadata = { 'authors': [{ 'name': author, 'affiliation': affiliation }], 'summary': summary, 'description': description, 'accuracy': accuracy, 'license': license, 'script': script, 'name': os.path.basename(model), 'graphemes': ['a'] } while True: try: validate(metadata, schema) except ValidationError as e: message(e.message) metadata[e.path[-1]] = click.prompt( e.path[-1], type=float if e.schema['type'] == 'number' else str) continue break else: metadata = json.load(metadata) validate(metadata, schema) metadata['graphemes'] = [char for char in ''.join(nn.codec.c2l.keys())] oid = repo.publish_model(model, metadata, access_token, partial(message, '.', nl=False)) print('\nmodel PID: {}'.format(oid))
def test_load_any_pyrnn_py3(self): """ Test load_any doesn't load pickled models on python 3 """ rnn = models.load_any(os.path.join(resources, 'model.pyrnn.gz'))
def test_load_clstm(self): """ Tests loading of valid clstm files. """ rnn = models.load_any(resources / 'toy.clstm') self.assertIsInstance(rnn, models.TorchSeqRecognizer)
from kraken.pageseg import segment from kraken.binarization import nlbin from kraken.rpred import rpred from itertools import cycle from kraken.lib import models cmap = cycle([(230, 25, 75, 127), (60, 180, 75, 127), (255, 225, 25, 127), (0, 130, 200, 127), (245, 130, 48, 127), (145, 30, 180, 127), (70, 240, 240, 127)]) net = models.load_any(sys.argv[1]) for fname in sys.argv[2:]: im = Image.open(fname) print(fname) im = nlbin(im) res = segment(im, maxcolseps=0) pred = rpred(net, im, res) im = im.convert('RGBA') tmp = Image.new('RGBA', im.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(tmp) for line in pred: for box in line.cuts: draw.rectangle(box, fill=next(cmap)) im = Image.alpha_composite(im, tmp) im.save('high_{}'.format(os.path.basename(fname)))
def test_load_any_proto(self): """ Test load_any loads protobuf models. """ rnn = models.load_any(resources / 'model.pronn') self.assertIsInstance(rnn, kraken.lib.models.TorchSeqRecognizer)
def test_load_invalid(self): """ Tests correct handling of invalid files. """ models.load_any(self.temp.name)
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ input_path = storage.get_abs_path(*doc[1]) output_path = ( doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] + '.xml') logger.debug('Searching for model {}'.format(model)) if model in nidaba_cfg['kraken_models']: model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model])) elif model in nidaba_cfg['ocropus_models']: model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model])) else: raise NidabaInvalidParameterException('Model not defined in ' 'configuration') img = Image.open(input_path) logger.debug('Reading TEI segmentation from {}'.format(doc[1])) tei = TEIFacsimile() with storage.StorageFile(*doc[0]) as seg: tei.read(seg) logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines logger.debug('Loading model {}'.format(model)) rnn = models.load_any(model) i = 0 logger.debug('Start recognizing characters') for rec in rpred.rpred(rnn, img, [(int(x[0]), int(x[1]), int(x[2]), int(x[3])) for x in lines]): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(lines[i][4])) tei.scope_line(lines[i][4]) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug( 'Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format( rec.prediction[line_offset:line_offset + len(segment)])) tei.add_graphemes([ (x[0], x[1], int(x[2] * 100)) for x in rec[line_offset:line_offset + len(segment)] ]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format( rec.prediction[line_offset:line_offset + len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([ (x[0], x[1], int(x[2] * 100)) for x in rec[line_offset:line_offset + len(whitespace)] ]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write(fp) return output_path
def test_load_any_proto(self): """ Test load_any loads protobuf models. """ rnn = models.load_any(os.path.join(resources, 'model.pronn')) self.assertIsInstance(rnn, kraken.lib.lstm.SeqRecognizer)
def simple_example(): if 'posix' == os.name: data_dir_path = '/home/sangwook/work/dataset' else: data_dir_path = 'D:/work/dataset' image_filepath = data_dir_path + '/text/receipt_epapyrus/keit_20190619/크기변환_카드영수증_5-1.png' #image_filepath = data_dir_path + '/text/receipt_epapyrus/epapyrus_20190618/receipt_1/img01.jpg' try: input_image = Image.open(image_filepath) except IOError: print('Failed to load an image, {}.'.format(image_filepath)) return #-------------------- threshold = 0.5 zoom = 0.5 escale = 1.0 border = 0.1 perc = 80 # [1, 100]. range = 20 low = 5 # [1, 100]. high = 90 # [1, 100]. binary = binarizer(input_image, threshold, zoom, escale, border, perc, range, low, high) #-------------------- text_direction = 'horizontal-lr' # Sets principal text direction. {'horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl'}. script_detect = False # Enable script detection on segmenter output. allowed_scripts = None # List of allowed scripts in script detection output. Ignored if disabled. scale = None maxcolseps = 2 black_colseps = False remove_hlines = True pad = (0, 0) # Left and right padding around lines. mask_filepath = None # Segmentation mask suppressing page areas for line detection. 0-valued image regions are ignored for segmentation purposes. Disables column detection. segments = segmenter(binary, text_direction, script_detect, allowed_scripts, scale, maxcolseps, black_colseps, remove_hlines, pad, mask_filepath) # segments.keys() = ['text_direction', 'boxes', 'script_detection']. #-------------------- # Visualize bounding boxes. if False: import cv2 rgb = cv2.imread(image_filepath, cv2.IMREAD_COLOR) if rgb is None: print('Failed to load an image file, {}.'.format(image_filepath)) return else: for bbox in segments['boxes']: x0, y0, x1, y1 = bbox cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('Image', rgb) cv2.waitKey(0) #-------------------- # Download model. # kraken.py get 10.5281/zenodo.2577813 # python kraken.py get 10.5281/zenodo.2577813 # ~/.config/kraken # ~/.kraken # /usr/local/share/ocropus #DEFAULT_MODEL = 'en-default.mlmodel' DEFAULT_MODEL = './en_best.mlmodel' #model = DEFAULT_MODEL # Path to an recognition model or mapping of the form $script1:$model1. Add multiple mappings to run multi-model recognition based on detected scripts. Use the default keyword for adding a catch-all model. Recognition on scripts can be ignored with the model value ignore. pad = 16 # Left and right padding around lines. reorder = True # Reorder code points to logical order. no_segmentation = False # Enables non-segmentation mode treating each input image as a whole line. serializer = 'text' # Switch between hOCR, ALTO, and plain text output. {'hocr', 'alto', 'abbyyxml', 'text'}. text_direction = 'horizontal-tb' # Sets principal text direction in serialization output. {'horizontal-tb', 'vertical-lr', 'vertical-rl'}. #lines = 'lines.json' # JSON file containing line coordinates. threads = 1 # Number of threads to use for OpenMP parallelization. device = 'cpu' # Select device to use (cpu, cuda:0, cuda:1, ...). model_dict = {'ignore': []} # type: Dict[str, Union[str, List[str]]] model_dict['default'] = DEFAULT_MODEL nm = {} # type: Dict[str, models.TorchSeqRecognizer]. ign_scripts = model_dict.pop('ignore') for k, v in model_dict.items(): location = None if os.path.isfile(v): location = v if not location: print('No model {} for {} found.'.format(v, k)) continue try: rnn = models.load_any(location, device=device) nm[k] = rnn except Exception: print('Model loading error, {}.'.format(location)) continue if 'default' in nm: from collections import defaultdict nn = defaultdict(lambda: nm['default'] ) # type: Dict[str, models.TorchSeqRecognizer]. nn.update(nm) nm = nn else: print('No default model.') return # Thread count is global so setting it once is sufficient. nn[k].nn.set_num_threads(threads) return recognizer(input_image, model=nm, pad=pad, no_segmentation=no_segmentation, bidi_reordering=reorder, script_ignore=ign_scripts, mode=serializer, text_direction=text_direction, segments=segments)
def test_load_any_invalid(self): """ Test load_any raises the proper exception if object is neither pickle nor protobuf. """ models.load_any(self.temp.name)
import cv2 import numpy as np import pandas as pd import random from kraken.lib.models import load_any from kraken import rpred, binarization from PIL import Image from subprocess import call from imutils import contours import argparse import warnings warnings.filterwarnings("ignore", category=FutureWarning) ## ---Loading Kraken Model--- model = load_any("en-default.mlmodel") def preprocessing_non_tabular(path): img = cv2.imread(path) ## ---Binarization of image--- genrator_image = Image.fromarray(img) genrator_image = binarization.nlbin(genrator_image) # ----Grayscaling Image---- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # --- performing Otsu threshold --- ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) cv2.imwrite("processed_image/threshold.png", thresh1)
#! /usr/bin/env python from kraken.lib import models from kraken import rpred from PIL import Image from glob import glob import sys model = sys.argv[1] gt = sys.argv[2] if len(sys.argv) > 2 else '.' rnn = models.load_any(model) ims = glob(gt + '/*.png') for f in ims: print(f) im = Image.open(f) it = rpred.rpred(rnn, im, [(0, 0) + im.size]) with open(f + '.rec.txt', 'wb') as fp: fp.write(it.next().prediction.encode('utf-8'))
def test(ctx, model, evaluation_files, device, pad, threads, test_set): """ Evaluate on a test set. """ if not model: raise click.UsageError('No model to evaluate given.') import numpy as np from PIL import Image from kraken.serialization import render_report from kraken.lib import models from kraken.lib.dataset import global_align, compute_confusions, generate_input_transforms logger.info('Building test set from {} line images'.format( len(test_set) + len(evaluation_files))) nn = {} for p in model: message('Loading model {}\t'.format(p), nl=False) nn[p] = models.load_any(p) message('\u2713', fg='green') test_set = list(test_set) # set number of OpenMP threads logger.debug('Set OpenMP threads to {}'.format(threads)) next(iter(nn.values())).nn.set_num_threads(threads) # merge training_files into ground_truth list if evaluation_files: test_set.extend(evaluation_files) if len(test_set) == 0: raise click.UsageError( 'No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.' ) def _get_text(im): with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp: return get_display(fp.read()) acc_list = [] for p, net in nn.items(): algn_gt: List[str] = [] algn_pred: List[str] = [] chars = 0 error = 0 message('Evaluating {}'.format(p)) logger.info('Evaluating {}'.format(p)) batch, channels, height, width = net.nn.input ts = generate_input_transforms(batch, height, width, channels, pad) with log.progressbar(test_set, label='Evaluating') as bar: for im_path in bar: i = ts(Image.open(im_path)) text = _get_text(im_path) pred = net.predict_string(i) chars += len(text) c, algn1, algn2 = global_align(text, pred) algn_gt.extend(algn1) algn_pred.extend(algn2) error += c acc_list.append((chars - error) / chars) confusions, scripts, ins, dels, subs = compute_confusions( algn_gt, algn_pred) rep = render_report(p, chars, error, confusions, scripts, ins, dels, subs) logger.info(rep) message(rep) logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format( np.mean(acc_list) * 100, np.std(acc_list) * 100)) message('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format( np.mean(acc_list) * 100, np.std(acc_list) * 100))
def ocr_kraken(doc, method=u'ocr_kraken', model=None): """ Runs kraken on an input document and writes a TEI file. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string append to all output files model (unicode): Identifier for the font model to use Returns: (unicode, unicode): Storage tuple for the output file """ input_path = storage.get_abs_path(*doc[1]) output_path = (doc[1][0], os.path.splitext(storage.insert_suffix(doc[1][1], method, model))[0] + '.xml') logger.debug('Searching for model {}'.format(model)) if model in nidaba_cfg['kraken_models']: model = storage.get_abs_path(*(nidaba_cfg['kraken_models'][model])) elif model in nidaba_cfg['ocropus_models']: model = storage.get_abs_path(*(nidaba_cfg['ocropus_models'][model])) else: raise NidabaInvalidParameterException('Model not defined in ' 'configuration') img = Image.open(input_path) logger.debug('Reading TEI segmentation from {}'.format(doc[1])) tei = OCRRecord() with storage.StorageFile(*doc[0]) as seg: tei.load_tei(seg) logger.debug('Clearing out word/grapheme boxes') # kraken is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('kraken', 'character recognition') lines = tei.lines logger.debug('Loading model {}'.format(model)) rnn = models.load_any(model) i = 0 logger.debug('Start recognizing characters') for line_id, rec in zip(lines, rpred.rpred(rnn, img, [x['bbox'] for x in lines.itervalues()])): # scope the current line and add all graphemes recognized by kraken to # it. logger.debug('Scoping line {}'.format(line_id)) tei.scope_line(line_id) i += 1 splits = regex.split(u'(\s+)', rec.prediction) line_offset = 0 for segment, whitespace in izip_longest(splits[0::2], splits[1::2]): if len(segment): seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(segment)]) logger.debug('Creating new segment at {} {} {} {}'.format(*seg_bbox)) tei.add_segment(seg_bbox) logger.debug('Adding graphemes (segment): {}'.format(rec.prediction[line_offset:line_offset+len(segment)])) tei.add_graphemes([{'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(segment)]]) line_offset += len(segment) if whitespace: logger.debug('Adding graphemes (whitespace): {}'.format(rec.prediction[line_offset:line_offset+len(whitespace)])) seg_bbox = max_bbox(rec.cuts[line_offset:line_offset + len(whitespace)]) tei.add_segment(seg_bbox) tei.add_graphemes([{'grapheme': x[0], 'bbox': x[1], 'confidence': int(x[2] * 100)} for x in rec[line_offset:line_offset+len(whitespace)]]) line_offset += len(whitespace) with storage.StorageFile(*output_path, mode='wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write_tei(fp) return output_path
def detect_scripts(im, bounds, model=pkg_resources.resource_filename(__name__, 'script.mlmodel'), valid_scripts=None): """ Detects scripts in a segmented page. Classifies lines returned by the page segmenter into runs of scripts/writing systems. Args: im (PIL.Image): A bi-level page of mode '1' or 'L' bounds (dict): A dictionary containing a 'boxes' entry with a list of coordinates (x0, y0, x1, y1) of a text line in the image and an entry 'text_direction' containing 'horizontal-lr/rl/vertical-lr/rl'. model (str): Location of the script classification model or None for default. valid_scripts (list): List of valid scripts. Returns: {'script_detection': True, 'text_direction': '$dir', 'boxes': [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text direction and a list of lists of reading order sorted bounding boxes under the key 'boxes' with each list containing the script segmentation of a single line. Script is a ISO15924 4 character identifier. Raises: KrakenInvalidModelException if no clstm module is available. """ raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.') im_str = get_im_str(im) logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str)) logger.debug(u'Loading detection model {}'.format(model)) rnn = models.load_any(model) # load numerical to 4 char identifier map logger.debug(u'Loading label to identifier map') with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp: n2s = json.load(fp) # convert allowed scripts to labels val_scripts = [] if valid_scripts: logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts)) for k, v in n2s.items(): if v in valid_scripts: val_scripts.append(chr(int(k) + 0xF0000)) else: valid_scripts = [] it = rpred(rnn, im, bounds, bidi_reordering=False) preds = [] logger.debug(u'Running detection') for pred, bbox in zip(it, bounds['boxes']): # substitute inherited scripts with neighboring runs def _subs(m, s, r=False): p = u'' for c in s: if c in m and p and not r: p += p[-1] elif c not in m and p and r: p += p[-1] else: p += c return p logger.debug(u'Substituting scripts') p = _subs([u'\U000f03e2', u'\U000f03e6'], pred.prediction) # do a reverse run to fix leading inherited scripts pred.prediction = ''.join(reversed(_subs([u'\U000f03e2', u'\U000f03e6'], reversed(p)))) # group by valid scripts. two steps: 1. substitute common confusions # (Latin->Fraktur and Syriac->Arabic) if given in script list. if 'Arab' in valid_scripts and 'Syrc' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f0087', u'\U000f00a0') if 'Latn' in valid_scripts and 'Latf' not in valid_scripts: pred.prediction = pred.prediction.replace(u'\U000f00d9', u'\U000f00d7') # next merge adjacent scripts if val_scripts: pred.prediction = _subs(val_scripts, pred.prediction, r=True) # group by grapheme t = [] logger.debug(u'Merging detections') # if line contains only a single script return whole line bounding box if len(set(pred.prediction)) == 1: logger.debug('Only one script on line. Emitting whole line bbox') k = ord(pred.prediction[0]) - 0xF0000 t.append((n2s[str(k)], bbox)) else: for k, g in groupby(pred, key=lambda x: x[0]): # convert to ISO15924 numerical identifier k = ord(k) - 0xF0000 b = max_bbox(x[1] for x in g) t.append((n2s[str(k)], b)) preds.append(t) return {'boxes': preds, 'text_direction': bounds['text_direction'], 'script_detection': True}
def test_load_any_pyrnn_py3(self): """ Test load_any doesn't load pickled models on python 3 """ with raises(KrakenInvalidModelException): rnn = models.load_any(resources / 'model.pyrnn.gz')
def test(ctx, model, evaluation_files, device, pad, threads, test_set): """ Evaluate on a test set. """ if not model: raise click.UsageError('No model to evaluate given.') import numpy as np from PIL import Image from kraken.serialization import render_report from kraken.lib import models from kraken.lib.dataset import global_align, compute_confusions, generate_input_transforms logger.info('Building test set from {} line images'.format(len(test_set) + len(evaluation_files))) nn = {} for p in model: message('Loading model {}\t'.format(p), nl=False) nn[p] = models.load_any(p) message('\u2713', fg='green') test_set = list(test_set) # set number of OpenMP threads logger.debug('Set OpenMP threads to {}'.format(threads)) next(iter(nn.values())).nn.set_num_threads(threads) # merge training_files into ground_truth list if evaluation_files: test_set.extend(evaluation_files) if len(test_set) == 0: raise click.UsageError('No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.') def _get_text(im): with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp: return get_display(fp.read()) acc_list = [] for p, net in nn.items(): algn_gt: List[str] = [] algn_pred: List[str] = [] chars = 0 error = 0 message('Evaluating {}'.format(p)) logger.info('Evaluating {}'.format(p)) batch, channels, height, width = net.nn.input ts = generate_input_transforms(batch, height, width, channels, pad) with log.progressbar(test_set, label='Evaluating') as bar: for im_path in bar: i = ts(Image.open(im_path)) text = _get_text(im_path) pred = net.predict_string(i) chars += len(text) c, algn1, algn2 = global_align(text, pred) algn_gt.extend(algn1) algn_pred.extend(algn2) error += c acc_list.append((chars-error)/chars) confusions, scripts, ins, dels, subs = compute_confusions(algn_gt, algn_pred) rep = render_report(p, chars, error, confusions, scripts, ins, dels, subs) logger.info(rep) message(rep) logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100)) message('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100))
def test_load_clstm(self): """ Tests loading of valid clstm files. """ rnn = models.load_any(os.path.join(resources, 'toy.clstm').encode('utf-8')) self.assertIsInstance(rnn, models.TorchSeqRecognizer)
def setUp(self): self.im = Image.open(resources / 'bw.png') self.overfit_line = Image.open(resources / '000236.png') self.model = load_any(resources / 'overfit.mlmodel')