def _generate_table(self, table_idx, cols, rows, **kwargs): table = Table(cols, rows) table = table.set_all_edges() pos_errors = [] # TODO: have a single list in place of two directional ones? # sorted on x-coordinate based on reading order i.e. LTR or RTL for direction in ["horizontal"]: for t in self.t_bbox[direction]: indices, error = get_table_index( table, t, direction, split_text=self.split_text, flag_size=self.flag_size, strip_text=self.strip_text, ) if indices[:2] != (-1, -1): pos_errors.append(error) for r_idx, c_idx, text in indices: table.cells[r_idx][c_idx].text = text accuracy = compute_accuracy([[100, pos_errors]]) data = table.data table.df = pd.DataFrame(data) table.shape = table.df.shape whitespace = compute_whitespace(data) table.flavor = "stream" table.accuracy = accuracy table.whitespace = whitespace table.order = table_idx + 1 # table.page = int(os.path.basename(self.rootname).split("page-")[-1]) # for plotting _text = [] _text.extend([(t.left, t.bottom, t.right, t.top) for t in self.horizontal_text]) table._text = _text table._image = None table._segments = None table._textedges = self.textedges return table
def _make_table(page, order): t = Table([], []) t.page = page t.order = order return t