def load_model(self): self.tokenizer = Tokenizer(chars=self.charset_base, max_text_length=self.max_text_length) self.model = HTRModel(architecture=self.architecture, input_size=self.input_size, vocab_size=self.tokenizer.vocab_size, top_paths=10) self.model.compile() self.model.load_checkpoint(target=self.checkpoint_path)
class TextRecognizer: def __init__(self, checkpoint_path="./py/checkpoint_recognizer.hdf5", input_size=(1024, 128, 1), max_text_length=128, charset_base=string.printable[:95], architecture="flor"): self.tokenizer = None self.model = None self.checkpoint_path = checkpoint_path self.input_size = input_size self.max_text_length = max_text_length self.charset_base = charset_base self.architecture = architecture ml_utils.limit_gpu_memory() self.load_model() def load_model(self): self.tokenizer = Tokenizer(chars=self.charset_base, max_text_length=self.max_text_length) self.model = HTRModel(architecture=self.architecture, input_size=self.input_size, vocab_size=self.tokenizer.vocab_size, top_paths=10) self.model.compile() self.model.load_checkpoint(target=self.checkpoint_path) def read_all_text_from_images(self, images): output = [] for img in images: output.append(self.read_text_from_image(img)) return output def read_text_from_image(self, img): img = pp.preprocess(img, input_size=self.input_size) x_test = pp.normalization([img]) predicts, probabilities = self.model.predict(x_test, ctc_decode=True) predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts] for i, (pred, prob) in enumerate(zip(predicts, probabilities)): return pred[0] return ""
print(f"Image shape:\t{dt[x].shape}") print(f"Ground truth:\t{gt[x].decode()}") print(f"Predict:\t{predicts[x]}\n") cv2.imshow("img", pp.adjust_to_see(dt[x])) cv2.waitKey(0) elif args.image: tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) img = pp.preproc(args.image, input_size=input_size) x_test = pp.normalization([img]) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=tokenizer.vocab_size, top_paths=10) model.compile() model.load_checkpoint(target=target_path) predicts, probabilities = model.predict(x_test, ctc_decode=True) predicts = [[tokenizer.decode(x) for x in y] for y in predicts] print("\n####################################") for i, (pred, prob) in enumerate(zip(predicts, probabilities)): print("\nProb. - Predict") for (pd, pb) in zip(pred, prob): print(f"{pb:.4f} - {pd}")
dtgen = DataGenerator(source=source_path, batch_size=batch_size, charset=charset_base, max_text_length=max_text_length) print(f"Train images: {dtgen.size['train']}") print(f"Validation images: {dtgen.size['valid']}") print(f"Test images: {dtgen.size['test']}") from network.model import HTRModel # create and compile HTRModel model = HTRModel(architecture=arch, input_size=input_size, vocab_size=dtgen.tokenizer.vocab_size, beam_width=10, stop_tolerance=20, reduce_tolerance=15) model.compile(learning_rate=0.001) model.summary(output_path, "summary.txt") # get default callbacks and load checkpoint weights file (HDF5) if exists model.load_checkpoint(target=target_path) callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1) # TODO PREDICT from data import preproc as pp
elif args.train or args.test: os.makedirs(output_path, exist_ok=True) dtgen = DataGenerator(hdf5_src=hdf5_src, batch_size=args.batch_size, charset=charset_base, max_text_length=max_text_length) network_func = getattr(architecture, args.arch) ioo = network_func(input_size=input_size, output_size=(dtgen.tokenizer.vocab_size + 1), learning_rate=0.001) model = HTRModel(inputs=ioo[0], outputs=ioo[1]) model.compile(optimizer=ioo[2]) checkpoint = "checkpoint_weights.hdf5" model.load_checkpoint(target=os.path.join(output_path, checkpoint)) if args.train: model.summary(output_path, "summary.txt") callbacks = model.get_callbacks(logdir=output_path, hdf5=checkpoint, verbose=1) start_time = time.time() h = model.fit_generator(generator=dtgen.next_train_batch(), epochs=args.epochs, steps_per_epoch=dtgen.train_steps,
input_size = (1024, 128, 1) max_text_length = 128 charset_base = string.printable[:95] assert os.path.isfile(source_path) or os.path.isfile(target_path) os.makedirs(output_path, exist_ok=True) dtgen = DataGenerator(source=source_path, batch_size=args.batch_size, charset=charset_base, max_text_length=max_text_length, predict=args.test) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=dtgen.tokenizer.vocab_size) # set `learning_rate` parameter or get architecture default value model.compile(learning_rate=0.001) model.load_checkpoint(target=target_path) start_time = datetime.datetime.now() predicts, _ = model.predict(x=dtgen.next_test_batch(), steps=dtgen.steps['test'], ctc_decode=True, verbose=1) predicts = [dtgen.tokenizer.decode(x[0]) for x in predicts]
source_path = os.path.join(args.path, args.source, 'words_screenshot_labeled') output_path = os.path.join(args.path, args.source, 'output') target_path = os.path.join(args.path, args.source, 'checkpoint') input_size = (1024, 128, 1) charset_base = string.printable[:95] ds = DataLoader.DataLoader(filePath=source_path, batchSize=args.batch_size, imgSize=args.image_size, maxTextLen=args.max_text_len) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=len(ds.charList)) model.compile(learning_rate=0.001) model.load_checkpoint(target=target_path) if args.train: model.summary(output_path, "summary.txt") callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1) start_time = datetime.datetime.now() h = model.fit(x=ds.getNext().imgs, epochs=args.epochs,
dtgen = DataGenerator(source=source_path, batch_size=batch_size, charset=charset_base, max_text_length=max_text_length) print(f"Train images: {dtgen.size['train']}") print(f"Validation images: {dtgen.size['valid']}") print(f"Test images: {dtgen.size['test']}") #TODO HTRModel Class from network.model import HTRModel # create and compile HTRModel model = HTRModel(architecture=arch, input_size=input_size, vocab_size=dtgen.tokenizer.vocab_size, beam_width=10, stop_tolerance=20, reduce_tolerance=15) model.compile(learning_rate=0.001) model.summary(output_path, "summary.txt") # get default callbacks and load checkpoint weights file (HDF5) if exists model.load_checkpoint(target=target_path) callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1) #TODO Training # to calculate total and average time per epoch