def create_line(self): if len(self.clicker.points) > 2: if self.lines: heights = np.median(np.array( [[line.heights[0], line.heights[1]] for line in self.lines]), axis=0) else: heights = [5, 5] new_line = layout.TextLine(id=None, baseline=np.array( self.clicker.points[:-1], dtype=np.float), heights=heights) new_line.polygon = linepp.baseline_to_textline( new_line.baseline, new_line.heights) dummy_region = layout.RegionLayout(id='dummy', polygon=[[-1.0, -1.0], [-1.0, -1.0], [-1.0, -1.0]]) dummy_region.lines.append(new_line) self.page_layout.regions.append(dummy_region) self.lines.append(new_line) self.clear_interaction() self.render()
def detect(self, image): polygons_list = [] out_map = self.get_maps(image) recompute = self.update_ds(out_map) if recompute: out_map = self.get_maps(image) baselines_list, heights_list, l_embd_list, m_embd_list, r_embd_list = self.parse_maps( out_map) if not baselines_list: return [], [], [], [] textlines_list = [ pp.baseline_to_textline(baseline, heights) for baseline, heights in zip(baselines_list, heights_list) ] clusters_array = self.cluster_lines(l_embd_list, m_embd_list, r_embd_list) # check noise lines for adjacancy to previous region in reading order due to eigenvector-based clustering errors # clusters_array = self.postprocess_noisy_lines(clusters_array, baselines_list, heights_list, out_map[:,:,3]) for i in range(np.amax(clusters_array) + 1): region_baselines = [] region_heights = [] region_textlines = [] for baseline, heights, textline, cluster in zip( baselines_list, heights_list, textlines_list, clusters_array): if cluster == i: region_baselines.append(baseline) region_heights.append(heights) region_textlines.append(textline) region_poly_points = np.concatenate(region_textlines, axis=0) max_poly_line = np.amax( np.array([ np.amax(np.diff(baseline[:, 0])) for baseline in region_baselines ])) max_height = np.amax(np.array(region_heights)) max_alpha = 1.5 * np.maximum(max_poly_line, max_height) region_poly = alpha_shape(region_poly_points, max_alpha) if region_poly.geom_type == 'MultiPolygon': for poly in region_poly: polygons_list.append(poly.simplify(5).exterior.coords) elif region_poly.geom_type == 'Polygon': polygons_list.append(region_poly.simplify(5).exterior.coords) baselines_list, heights_list, textlines_list = pp.order_lines_vertical( baselines_list, heights_list, textlines_list) polygons_list = self.filter_polygons(polygons_list) return polygons_list, baselines_list, heights_list, textlines_list
def update_selected_lines(self, bs=0, asc=0, dsc=0, start=0, end=0): for l_num in self.selected_lines: self.lines[l_num].heights = [ self.lines[l_num].heights[0] + float(asc), self.lines[l_num].heights[1] + float(dsc) ] self.lines[l_num].baseline[:, 1] += float(bs) self.lines[l_num].baseline[0, 0] += float(start) self.lines[l_num].baseline[-1, 0] += float(end) self.lines[l_num].polygon = linepp.baseline_to_textline( self.lines[l_num].baseline, self.lines[l_num].heights) self.render()
def process_page(self, img, page_layout: PageLayout): if not page_layout.regions: print(f"Warning: Skipping line detection for page {page_layout.id}. No text region present.") return page_layout baseline_list, heights_list, textline_list = self.line_engine.detect_lines(img) for region in page_layout.regions: region = self.assign_lines_to_region(baseline_list, heights_list, textline_list, region) if self.merge_lines: region_baseline_list = [line.baseline for line in region.lines] region_heights_list = [line.heights for line in region.lines] region_baseline_list, region_heights_list = linepp.merge_lines(region_baseline_list, region_heights_list) region_textline_list = [linepp.baseline_to_textline(baseline, heights) for baseline, heights in zip(region_baseline_list, region_heights_list)] region.lines = [] region = self.assign_lines_to_region(region_baseline_list, region_heights_list, region_textline_list, region) return page_layout
def postprocess_region_lines(self, region): if region.lines: region_baseline_list = [line.baseline for line in region.lines] region_textline_list = [line.polygon for line in region.lines] region_heights_list = [line.heights for line in region.lines] region.lines = [] rotation = linepp.get_rotation(region_baseline_list) region_baseline_list = [ linepp.rotate_coords(baseline, rotation, (0, 0)) for baseline in region_baseline_list ] if self.merge_lines: region_baseline_list, region_heights_list = linepp.merge_lines( region_baseline_list, region_heights_list) if self.stretch_lines == 'max': region_baseline_list = linepp.stretch_baselines_to_region( region_baseline_list, linepp.rotate_coords(region.polygon.copy(), rotation, (0, 0))) elif self.stretch_lines > 0: region_baseline_list = linepp.stretch_baselines( region_baseline_list, self.stretch_lines) if self.resample_lines: region_baseline_list = linepp.resample_baselines( region_baseline_list) if self.heights_from_regions: scores = [] region_heights_list = [] for baseline in region_baseline_list: baseline = linepp.rotate_coords(baseline, -rotation, (0, 0)) height_asc = int( round( np.amin(baseline[:, 1]) - np.amin(region.polygon[:, 1]))) height_des = int( round( np.amax(region.polygon[:, 1]) - np.amax(baseline[:, 1]))) region_heights_list.append((height_asc, height_des)) # the final line in the bounding box should be the longest and in case of ambiguity, also have the biggest ascender height scores.append( np.amax(baseline[:, 0]) - np.amin(baseline[:, 0]) + height_asc) best_ind = np.argmax(np.asarray(scores)) region_baseline_list = [region_baseline_list[best_ind]] region_heights_list = [region_heights_list[best_ind]] region_textline_list = [] for baseline, height in zip(region_baseline_list, region_heights_list): region_textline_list.append( linepp.baseline_to_textline(baseline, height)) if self.order_lines == 'vertical': region_baseline_list, region_heights_list, region_textline_list = linepp.order_lines_vertical( region_baseline_list, region_heights_list, region_textline_list) elif self.order_lines == 'reading_order': region_baseline_list, region_heights_list, region_textline_list = linepp.order_lines_general( region_baseline_list, region_heights_list, region_textline_list) else: raise ValueError( "Argument order_lines must be either 'vertical' or 'reading_order'." ) region_textline_list = [ linepp.rotate_coords(textline, -rotation, (0, 0)) for textline in region_textline_list ] region_baseline_list = [ linepp.rotate_coords(baseline, -rotation, (0, 0)) for baseline in region_baseline_list ] scores = [] for line in region.lines: width = line.baseline[-1][0] - line.baseline[0][0] height = line.heights[0] + line.heights[1] scores.append((width - self.stretch_lines) / height) region.lines = [ line for line, score in zip(region.lines, scores) if score > 0.5 ] region = assign_lines_to_region(region_baseline_list, region_heights_list, region_textline_list, region) return region
def process_page(self, img, page_layout: PageLayout): if not list(page_layout.lines_iterator()): print( f"Warning: Skipping line reninement for page {page_layout.id}. No text lines present." ) return page_layout baselines_map, heights_map = self.line_engine.infer_maps(img) if self.adjust_baselines: baselines = [ line.baseline for line in page_layout.lines_iterator() ] baselines = linepp.adjust_baselines_to_intensity(baselines, img) for line, baseline in zip(page_layout.lines_iterator(), baselines): line.baseline = baseline if self.adjust_heights: #heights_map = ndimage.morphology.grey_dilation(heights_map, size=(5, 1, 1)) for line in page_layout.lines_iterator(): baseline = line.baseline / self.downsample sample_points = linepp.resample_baselines([baseline], num_points=40)[0] heights_pred = self.line_engine.get_heights( heights_map, (np.round(sample_points[:, 1]).astype( np.int), np.round(sample_points[:, 0]).astype(np.int))) line.heights = heights_pred * self.downsample height = np.median([ l.heights[0] + l.heights[1] for l in page_layout.lines_iterator() ]) if height / self.downsample <= 6 or height / self.downsample > 18: temp_downsample = self.downsample print("ADAPT DOWNAMPLING", img.shape[0:2], self.downsample, height, height / self.downsample) self.downsample = max(1, int(height / 12 + 0.5)) self.line_engine.downsample = self.downsample baselines_map, heights_map = self.line_engine.infer_maps(img) self.line_engine.downsample = temp_downsample if self.adjust_heights: heights_map = ndimage.morphology.grey_dilation(heights_map, size=(11, 1, 1)) for line in page_layout.lines_iterator(): baseline = line.baseline / self.downsample sample_points = linepp.resample_baselines([baseline], num_points=40)[0] heights_pred = self.line_engine.get_heights( heights_map, (np.round(sample_points[:, 1]).astype(np.int), np.round(sample_points[:, 0]).astype(np.int))) line.heights = heights_pred * self.downsample height = np.median([ l.heights[0] + l.heights[1] for l in page_layout.lines_iterator() ]) print( f"OPTIMAL DOWNAMPLING {img.shape[0] // self.downsample}:{img.shape[1] // self.downsample}", self.downsample, height, height / self.downsample) self.downsample = temp_downsample for line in page_layout.lines_iterator(): line.polygon = linepp.baseline_to_textline(line.baseline, line.heights) return page_layout