def find_cuts(img): window = 19 img = cut_letters.removeWhitelines(word_img) if img is not None: if len(img[0]) <= window: cuts = [0, len(img[0])-1] else: hist = cut_letters.makeHist(img, window) cuts = cut_letters.findMaxima(hist) return cuts else: logger.warning('Image not good for classifying') return None
for f in out_dir.walk('*'): f.remove() logger.info('Starting to create a split file') for sentence in xml: for word in sentence: text = word.get('text') print(text) #if '@' in text or len(text) < 6: if text != 'buton': continue # Skip short words # Get the word image rect = {side: int(word.get(side)) for side in sides} word_img = img[rect['top']:rect['bottom'], rect['left']:rect['right']] word_img = cut_letters.removeWhitelines(word_img) if word_img is None: logger.warning('Word not good for classifying: {}'.format(text)) continue # Get the cuts cuts = find_cuts(word_img) if cuts is None: logger.warning('No cuts found in word') continue cv2.imwrite(out_dir + 'original.png', word_img) cuts_img = cut_letters.showCuts(word_img.copy(), cuts) cv2.imwrite(out_dir + 'cuts.png', cuts_img) i = 0