def compute_checkbox_position(blank_im): binary = convert_to_binary(255 - blank_im) labels, n = morph.label(binary) h, w = binary.shape minsize = 40 # find small dash in img sums = measurements.sum(binary, labels, range(n + 1)) sums = sums[labels] good = minimum(binary, 1 - (sums > 0) * (sums < minsize)) junk_cc = np.bitwise_xor(good, binary) # temporary fix: add bottom line junk_cc[h-1:, :] = np.ones((1, w)) junk_cc = morph.r_dilation(junk_cc, (7,7)) junk_cc = morph.r_closing(junk_cc, (9,9)) # find hole using morphology hole = morph.fill_hole(junk_cc) hole = hole - junk_cc # locate holes position labels, n = morph.label(hole) objects = morph.find_objects(labels) objects = sorted(objects, key=lambda b: sl.center(b)) area_thres = 0.4 * (amax([sl.area(b) for b in objects]) if len(objects) > 0 else 0) boxes = [[b[0].start, b[1].start, b[0].stop, b[1].stop] for b in objects if sl.area(b) > area_thres] return boxes, convert_binary_to_normal_im(hole)
def match_image_with_blank(im, blank_bin): binary = convert_to_binary(255 - im, thres=0.6) h, w = binary.shape max_shift_ratio = 0.06 range_x = int(max_shift_ratio * w) range_y = int(max_shift_ratio * h) sum_blank = np.sum(blank_bin) if sum_blank < 10: return im, None thres = 0.2 max_match = -1 shift_x, shift_y = 0, 0 for x in range(-range_x, range_x): for y in range(-range_y, range_y): temp_blank = shift_binary(blank_bin, (y, x)) #interpolation.shift(blank_bin, (y, x)) match = np.sum(temp_blank & binary) if 1.0 * match / sum_blank > thres and match > max_match: max_match = match shift_x, shift_y = x, y if max_match != -1: #print(shift_x, shift_y) temp_blank = shift_binary(blank_bin, (shift_y, shift_x)) #interpolation.shift(blank_bin, (shift_y, shift_x)) temp_blank = morph.r_dilation(temp_blank, (5,5)) binary = binary & (1 - temp_blank) #0.5 * binary + 0.5 * temp_blank return convert_binary_to_normal_im(binary), (shift_y, shift_x)
def compute_separators_morph(binary,scale,sepwiden=10,maxseps=2): """Finds vertical black lines corresponding to column separators.""" d0 = int(max(5,scale/4)) d1 = int(max(5,scale))+sepwiden thick = morph.r_dilation(binary,(d0,d1)) vert = morph.rb_opening(thick,(10*scale,1)) vert = morph.r_erosion(vert,(d0//2,sepwiden)) vert = morph.select_regions(vert,sl.dim1,min=3,nbest=2*maxseps) vert = morph.select_regions(vert,sl.dim0,min=20*scale,nbest=maxseps) return vert
def compute_separators_morph(binary, scale, sepwiden, maxseps): """Finds vertical black lines corresponding to column separators.""" d0 = int(max(5, scale / 4)) d1 = int(max(5, scale)) + sepwiden thick = morph.r_dilation(binary, (d0, d1)) vert = morph.rb_opening(thick, (10 * scale, 1)) vert = morph.r_erosion(vert, (d0 // 2, sepwiden)) vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2 * maxseps) vert = morph.select_regions(vert, sl.dim0, min=20 * scale, nbest=maxseps) return vert
def compute_colseps_mconv(binary, scale=1.0): """Find column separators using a combination of morphological operations and convolution.""" smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) thresh = (smoothed < amax(smoothed) * 0.1) blocks = morph.r_closing(binary, (int(4 * scale), int(4 * scale))) seps = minimum(blocks, thresh) seps = morph.select_regions(seps, sl.dim0, min=10 * scale, nbest=3) blocks = morph.r_dilation(blocks, (5, 5)) seps = maximum(seps, 1 - blocks) return seps
def compute_separators_morph_horizontal(binary, scale, widen=True): """Finds vertical black lines corresponding to column separators.""" span = 4 d0 = span #int(max(5, scale / 5)) if widen: d1 = span + 1 else: d1 = span thick = morph.r_dilation(binary, (d1, d0)) hor = morph.r_opening(thick, (1, int(4 * scale))) hor = morph.r_erosion(hor, (span, d0 // 2)) return hor
def compute_separators_morph_vertical(binary, scale, widen=True): """Finds vertical black lines corresponding to column separators.""" span = 3 #min(5, int(scale * 0.2)) d0 = span if widen: d1 = span + 1 else: d1 = span thick = morph.r_dilation(binary, (d0, d1)) vert = morph.r_opening(thick, (int(2 * scale), 1)) vert = morph.r_erosion(vert, (d0 // 2, span)) return vert
def compute_colseps_morph(binary, scale, maxseps=3, minheight=20, maxwidth=5): """Finds extended vertical whitespace corresponding to column separators using morphological operations.""" boxmap = psegutils.compute_boxmap(binary, scale, (0.4, 5), dtype='B') bounds = morph.rb_closing(B(boxmap), (int(5 * scale), int(5 * scale))) bounds = maximum(B(1 - bounds), B(boxmap)) cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale))) cols = morph.select_regions(cols, sl.aspect, min=args.csminaspect) cols = morph.select_regions(cols, sl.dim0, min=args.csminheight * scale, nbest=args.maxcolseps) cols = morph.r_erosion(cols, (int(0.5 + scale), 0)) cols = morph.r_dilation(cols, (int(0.5 + scale), 0), origin=(int(scale / 2) - 1, 0)) return cols
def compute_colseps_mconv(self, binary, scale=1.0): """Find column separators using a combination of morphological operations and convolution.""" # h, w = binary.shape smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) thresh = (smoothed < np.amax(smoothed) * 0.1) blocks = morph.rb_closing(binary, (int(4 * scale), int(4 * scale))) seps = np.minimum(blocks, thresh) seps = morph.select_regions(seps, sl.dim0, min=self.parameter['csminheight'] * scale, nbest=self.parameter['maxcolseps']) blocks = morph.r_dilation(blocks, (5, 5)) seps = np.maximum(seps, 1 - blocks) return seps
def compute_colseps_morph(self, binary, scale): """Finds extended vertical whitespace corresponding to column separators using morphological operations.""" boxmap = psegutils.compute_boxmap(binary, scale, dtype='B') bounds = morph.rb_closing(B(boxmap), (int(5 * scale), int(5 * scale))) bounds = np.maximum(B(1 - bounds), B(boxmap)) cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale))) cols = morph.select_regions(cols, sl.aspect, min=self.parameter['csminaspect']) cols = morph.select_regions(cols, sl.dim0, min=self.parameter['csminheight'] * scale, nbest=self.parameter['maxcolseps']) cols = morph.r_erosion(cols, (int(0.5 + scale), 0)) cols = morph.r_dilation(cols, (int(0.5 + scale), 0), origin=(int(scale / 2) - 1, 0)) return cols
def compute_colseps_mconv(binary, scale=1.0): """Find column separators using a combination of morphological operations and convolution.""" h, w = binary.shape smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5)) smoothed = uniform_filter(smoothed, (5.0 * scale, 1)) thresh = (smoothed < amax(smoothed) * 0.1) DSAVE("1thresh", thresh) blocks = morph.rb_closing(binary, (int(4 * scale), int(4 * scale))) DSAVE("2blocks", blocks) seps = minimum(blocks, thresh) seps = morph.select_regions(seps, sl.dim0, min=args['csminheight'] * scale, nbest=args['maxcolseps']) DSAVE("3seps", seps) blocks = morph.r_dilation(blocks, (5, 5)) DSAVE("4blocks", blocks) seps = maximum(seps, 1 - blocks) DSAVE("5combo", seps) return seps