def preprocess_partitions(self, input_size): """Preprocess images and sentences from partitions""" for y in self.partitions: arange = range(len(self.dataset[y]['gt'])) for i in reversed(arange): text = pp.text_standardize(self.dataset[y]['gt'][i]) if not self.check_text(text): self.dataset[y]['gt'].pop(i) self.dataset[y]['dt'].pop(i) continue self.dataset[y]['gt'][i] = text.encode() results = [] # print("using cpu count:", 2) # with multiprocessing.Pool(2) as pool: # print(f"Partition: {y}") # for result in tqdm(pool.imap(partial(pp.preprocess, input_size=input_size), self.dataset[y]['dt']), # total=len(self.dataset[y]['dt'])): # results.append(result) # pool.close() # pool.join() print(f"Partition: {y}") for imgpath in tqdm(self.dataset[y]['dt'], total=len(self.dataset[y]['dt'])): results.append(pp.preprocess(imgpath, input_size=input_size)) self.dataset[y]['dt'] = results
def getNext(self,train = True): "iterator" self.train = train if self.train == True: j = 0 else: j = 1 while True: if self.currIdx <= len(self.img_partitions[j]): index = self.currIdx until = self.currIdx + self.batchSize else: index = self.currIdx until = len(self.img_partitions[j]) imgs = [pp.preprocess(os.path.join(self.filePath,self.img_partitions[j][i].filePath),self.imgSize) for i in range(index,until)] imgs = pp.augmentation(imgs, rotation_range=1.5, scale_range=0.05, height_shift_range=0.025, width_shift_range=0.05, erode_range=5, dilate_range=3) imgs = pp.normalization(imgs) gtTexts = [self.img_partitions[j][i].gtText for i in range(index,until)] gtTexts = [self.tokenizer.encode(gtTexts[i]) for i in range(len(gtTexts))] gtTexts = [np.pad(i, (0, self.tokenizer.maxlen - len(i))) for i in gtTexts] gtTexts = np.asarray(gtTexts, dtype=np.int16) yield(imgs,gtTexts)
def read_text_from_image(self, img): img = pp.preprocess(img, input_size=self.input_size) x_test = pp.normalization([img]) predicts, probabilities = self.model.predict(x_test, ctc_decode=True) predicts = [[self.tokenizer.decode(x) for x in y] for y in predicts] for i, (pred, prob) in enumerate(zip(predicts, probabilities)): return pred[0] return ""
with open(predict_file, "r") as lg: predicts = [line[5:] for line in lg if line.startswith("TE_P")] for x in range(len(dt)): print(f"Image shape:\t{dt[x].shape}") print(f"Ground truth:\t{gt[x].decode()}") print(f"Predict:\t{predicts[x]}\n") cv2.imshow("img", pp.adjust_to_see(dt[x])) cv2.waitKey(0) elif args.image: tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) img = pp.preprocess(args.image, input_size=input_size) x_test = pp.normalization([img]) model = HTRModel(architecture=args.arch, input_size=input_size, vocab_size=tokenizer.vocab_size, top_paths=10) model.compile() model.load_checkpoint(target=target_path) predicts, probabilities = model.predict(x_test, ctc_decode=True) predicts = [[tokenizer.decode(x) for x in y] for y in predicts] print("\n####################################") for i, (pred, prob) in enumerate(zip(predicts, probabilities)):