def binary_objects(binary: np.array) -> np.array: """ Labels features in an array and segments them into objects. """ labels, _ = morph.label(binary) objects = morph.find_objects(labels) return objects
def compute_line_seeds(binary, bottom, top, colseps, scale, threshold=0.2): """ Base on gradient maps, computes candidates for baselines and xheights. Then, it marks the regions between the two as a line seed. """ vrange = int(scale) bmarked = maximum_filter(bottom == maximum_filter(bottom, (vrange, 0)), (2, 2)) bmarked = bmarked * (bottom > threshold*np.amax(bottom)*threshold)*(1-colseps) tmarked = maximum_filter(top == maximum_filter(top, (vrange, 0)), (2, 2)) tmarked = tmarked * (top > threshold*np.amax(top)*threshold/2)*(1-colseps) tmarked = maximum_filter(tmarked, (1, 20)) seeds = np.zeros(binary.shape, 'i') delta = max(3, int(scale/2)) for x in range(bmarked.shape[1]): transitions = sorted([(y, 1) for y in find(bmarked[:, x])] + [(y, 0) for y in find(tmarked[:, x])])[::-1] transitions += [(0, 0)] for l in range(len(transitions)-1): y0, s0 = transitions[l] if s0 == 0: continue seeds[y0-delta:y0, x] = 1 y1, s1 = transitions[l+1] if s1 == 0 and (y0-y1) < 5*scale: seeds[y1:y0, x] = 1 seeds = maximum_filter(seeds, (1, int(1+scale))) seeds = seeds * (1-colseps) seeds, _ = morph.label(seeds) return seeds
def compute_line_seeds(binary, bottom, top, colseps, scale, threshold=0.2): """ Base on gradient maps, computes candidates for baselines and xheights. Then, it marks the regions between the two as a line seed. """ vrange = int(scale) bmarked = maximum_filter(bottom == maximum_filter(bottom, (vrange, 0)), (2, 2)) bmarked = bmarked * (bottom > threshold * np.amax(bottom) * threshold) * ( 1 - colseps) tmarked = maximum_filter(top == maximum_filter(top, (vrange, 0)), (2, 2)) tmarked = tmarked * (top > threshold * np.amax(top) * threshold / 2) * ( 1 - colseps) tmarked = maximum_filter(tmarked, (1, 20)) seeds = np.zeros(binary.shape, 'i') delta = max(3, int(scale / 2)) for x in range(bmarked.shape[1]): transitions = sorted([(y, 1) for y in find(bmarked[:, x])] + [(y, 0) for y in find(tmarked[:, x])])[::-1] transitions += [(0, 0)] for l in range(len(transitions) - 1): y0, s0 = transitions[l] if s0 == 0: continue seeds[y0 - delta:y0, x] = 1 y1, s1 = transitions[l + 1] if s1 == 0 and (y0 - y1) < 5 * scale: seeds[y1:y0, x] = 1 seeds = maximum_filter(seeds, (1, int(1 + scale))) seeds = seeds * (1 - colseps) seeds, _ = morph.label(seeds) return seeds
def remove_hlines(binary, scale, maxsize=10): """ Removes horizontal black lines that only interfere with page segmentation. Args: binary (numpy.array): scale (float): maxsize (int): maximum size of removed lines Returns: numpy.array containing the filtered image. """ labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize * scale: labels[b][labels[b] == i + 1] = 0 return np.array(labels != 0, 'B')
def remove_hlines(binary, scale, maxsize=10): """ Removes horizontal black lines that only interfere with page segmentation. Args: binary (numpy.array): scale (float): maxsize (int): maximum size of removed lines Returns: numpy.array containing the filtered image. """ labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize*scale: labels[b][labels[b] == i+1] = 0 return np.array(labels != 0, 'B')
def remove_hlines(binary: np.ndarray, scale: float, maxsize: int = 10) -> np.ndarray: """ Removes horizontal black lines that only interfere with page segmentation. Args: binary: scale: maxsize: maximum size of removed lines Returns: numpy.ndarray containing the filtered image. """ logger.debug('Filtering horizontal lines') labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize * scale: labels[b][labels[b] == i + 1] = 0 return np.array(labels != 0, 'B')
def binary_objects(binary): labels, n = morph.label(binary) objects = morph.find_objects(labels) return objects
def segment(im, text_direction: str = 'horizontal-lr', scale: Optional[float] = None, maxcolseps: float = 2, black_colseps: bool = False, no_hlines: bool = True, pad: Union[int, Tuple[int, int]] = 0, mask: Optional[np.ndarray] = None, reading_order_fn: Callable = reading_order) -> Dict[str, Any]: """ Segments a page into text lines. Segments a page into text lines and returns the absolute coordinates of each line in reading order. Args: im: A bi-level page of mode '1' or 'L' text_direction: Principal direction of the text (horizontal-lr/rl/vertical-lr/rl) scale: Scale of the image. Will be auto-determined if set to `None`. maxcolseps: Maximum number of whitespace column separators black_colseps: Whether column separators are assumed to be vertical black lines or not no_hlines: Switch for small horizontal line removal. pad: Padding to add to line bounding boxes. If int the same padding is used both left and right. If a 2-tuple, uses (padding_left, padding_right). mask: A bi-level mask image of the same size as `im` where 0-valued regions are ignored for segmentation purposes. Disables column detection. reading_order_fn: Function to call to order line output. Callable accepting a list of slices (y, x) and a text direction in (`rl`, `lr`). Returns: A dictionary containing the text direction and a list of reading order sorted bounding boxes under the key 'boxes': .. code-block:: {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]} Raises: KrakenInputException: if the input image is not binarized or the text direction is invalid. """ im_str = get_im_str(im) logger.info(f'Segmenting {im_str}') if im.mode != '1' and not is_bitonal(im): logger.error(f'Image {im_str} is not bi-level') raise KrakenInputException(f'Image {im_str} is not bi-level') # rotate input image for vertical lines if text_direction.startswith('horizontal'): angle = 0 offset = (0, 0) elif text_direction == 'vertical-lr': angle = 270 offset = (0, im.size[1]) elif text_direction == 'vertical-rl': angle = 90 offset = (im.size[0], 0) else: logger.error(f'Invalid text direction \'{text_direction}\'') raise KrakenInputException(f'Invalid text direction {text_direction}') logger.debug(f'Rotating input image by {angle} degrees') im = im.rotate(angle, expand=True) a = pil2array(im) binary = np.array(a > 0.5 * (np.amin(a) + np.amax(a)), 'i') binary = 1 - binary _, ccs = morph.label(1 - binary) if ccs > np.dot(*im.size) / (30 * 30): logger.warning( f'Too many connected components for a page image: {ccs}') return {'text_direction': text_direction, 'boxes': []} if not scale: scale = estimate_scale(binary) if no_hlines: binary = remove_hlines(binary, scale) # emptyish images will cause exceptions here. try: if mask: if mask.mode != '1' and not is_bitonal(mask): logger.error('Mask is not bitonal') raise KrakenInputException('Mask is not bitonal') mask = mask.convert('1') if mask.size != im.size: logger.error( f'Mask size {mask.size} doesn\'t match image size {im.size}' ) raise KrakenInputException( f'Mask size {mask.size} doesn\'t match image size {im.size}' ) logger.info( 'Masking enabled in segmenter. Disabling column detection.') mask = mask.rotate(angle, expand=True) colseps = pil2array(mask) elif black_colseps: colseps, binary = compute_black_colseps(binary, scale, maxcolseps) else: colseps = compute_white_colseps(binary, scale, maxcolseps) except ValueError: logger.warning( f'Exception in column finder (probably empty image) for {im_str}') return {'text_direction': text_direction, 'boxes': []} bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary lines = compute_lines(segmentation, scale) order = reading_order_fn([line.bounds for line in lines], text_direction[-2:]) lsort = topsort(order) lines = [lines[i].bounds for i in lsort] lines = [(s2.start, s1.start, s2.stop, s1.stop) for s1, s2 in lines] if isinstance(pad, int): pad = (pad, pad) lines = [(max(x[0] - pad[0], 0), x[1], min(x[2] + pad[1], im.size[0]), x[3]) for x in lines] return { 'text_direction': text_direction, 'boxes': rotate_lines(lines, 360 - angle, offset).tolist(), 'script_detection': False }