def left_aligned_table(source): assert source.rows() == 1 and source.columns() == 1 bounds = source.bounds() contents = source.get_at(0, 0)[:] contents.sort(key=cmp_to_key(sort_topdown_ltr)) table = [] row = [] columns = [] last_y = contents[0].bounds().y1() for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): break columns.append(item_bounds.x1()) last_y = contents[0].bounds().y1() row = [[]] * len(columns) for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): if any((len(c) == 0 for c in row)): for i in range(0, len(columns)): table[-1][i] += row[i] else: table.append(row) row = [[]] * len(columns) last_y = item_bounds.y1() for i in range(0, len(columns)): if pdftable.pretty_much_equal(item_bounds.x1(), columns[i]): col_index = i break else: print(columns) print(contents) # RG: Comment out and it works # TODO_: Find what pretty_much_equal() checks #raise Exception("No matching column!") row[col_index] = [item] if any((len(c) == 0 for c in row)): for i in range(0, len(columns)): table[-1][i] += row[i] else: table.append(row) return pdftable.ImplicitTable(bounds, table)
def __merge_text(self, lines): def sort_text(a, b): if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()): if a.rect.y1() < b.rect.y1(): return -1 if a.rect.y1() == b.rect.y1(): return 1 return 0 if a.rect.x1() < b.rect.x1(): return -1 return 1 if len(lines) == 0: return lines.sort(key=cmp_to_key(sort_topdown_ltr)) merged = [lines[0]] for line in lines[1:]: last = merged[-1] same_x = pdftable.pretty_much_equal(line.rect.x1(), last.rect.x1()) same_size = last.font_size() == line.font_size() decent_descent = line.approx_rect.y1() - last.approx_rect.y2( ) < 1.2 if same_x and same_size and decent_descent: lastChar = last.chars[-1].get_text()[-1] if not (lastChar == "-" or lastChar == "/"): last.append_char(" ") last.append(line) else: merged.append(line) return merged
def __merge_text(self, lines): def sort_text(a, b): if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()): if a.rect.y1() < b.rect.y1(): return -1 if a.rect.y1() == b.rect.y1(): return 1 return 0 if a.rect.x1() < b.rect.x1(): return -1 return 1 if len(lines) == 0: return lines.sort(cmp=sort_topdown_ltr) merged = [lines[0]] for line in lines[1:]: last = merged[-1] same_x = pdftable.pretty_much_equal(line.rect.x1(), last.rect.x1()) same_size = last.font_size() == line.font_size() decent_descent = line.approx_rect.y1() - last.approx_rect.y2() < 1.2 if same_x and same_size and decent_descent: lastChar = last.chars[-1].get_text()[-1] if not (lastChar == "-" or lastChar == "/"): last.append_char(" ") last.append(line) else: merged.append(line) return merged
def left_aligned_table(source): assert source.rows() == 1 and source.columns() == 1 bounds = source.bounds() contents = source.get_at(0, 0)[:] contents.sort(cmp=sort_topdown_ltr) table = [] row = [] columns = [] last_y = contents[0].bounds().y1() for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): break columns.append(item_bounds.x1()) last_y = contents[0].bounds().y1() row = [[]] * len(columns) for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): if any((len(c) == 0 for c in row)): for i in xrange(0, len(columns)): table[-1][i] += row[i] else: table.append(row) row = [[]] * len(columns) last_y = item_bounds.y1() for i in xrange(0, len(columns)): if pdftable.pretty_much_equal(item_bounds.x1(), columns[i]): col_index = i break else: print columns print contents raise Exception("No matching column!") row[col_index] = [item] if any((len(c) == 0 for c in row)): for i in xrange(0, len(columns)): table[-1][i] += row[i] else: table.append(row) return pdftable.ImplicitTable(bounds, table)
def sort_text(a, b): if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()): if a.rect.y1() < b.rect.y1(): return -1 if a.rect.y1() == b.rect.y1(): return 1 return 0 if a.rect.x1() < b.rect.x1(): return -1 return 1
def center_aligned_table(source): assert source.rows() == 1 and source.columns() == 1 bounds = source.bounds() contents = source.get_at(0, 0)[:] contents.sort(key=cmp_to_key(sort_topdown_ltr)) column_centers = [] last_y = contents[0].bounds().y1() for item in contents: if not pdftable.pretty_much_equal(last_y, item.bounds().y1()): break column_centers.append(item.bounds().xmid()) table = [] row = [[]] * len(column_centers) for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): if any((len(c) == 0 for c in row)): for i in range(0, len(column_centers)): table[-1][i] += row[i] else: table.append(row) row = [[]] * len(column_centers) last_y = item_bounds.y1() col_index = None min_dist = float("inf") for i in range(0, len(column_centers)): distance = abs(item_bounds.xmid() - column_centers[i]) if distance < min_dist: min_dist = distance col_index = i row[col_index] = [item] if any((len(c) == 0 for c in row)): for i in range(0, len(column_centers)): table[-1][i] += row[i] else: table.append(row) return pdftable.ImplicitTable(bounds, table)
def center_aligned_table(source): assert source.rows() == 1 and source.columns() == 1 bounds = source.bounds() contents = source.get_at(0, 0)[:] contents.sort(cmp=sort_topdown_ltr) column_centers = [] last_y = contents[0].bounds().y1() for item in contents: if not pdftable.pretty_much_equal(last_y, item.bounds().y1()): break column_centers.append(item.bounds().xmid()) table = [] row = [[]] * len(column_centers) for item in contents: item_bounds = item.bounds() if not pdftable.pretty_much_equal(item_bounds.y1(), last_y): if any((len(c) == 0 for c in row)): for i in xrange(0, len(column_centers)): table[-1][i] += row[i] else: table.append(row) row = [[]] * len(column_centers) last_y = item_bounds.y1() col_index = None min_dist = float("inf") for i in xrange(0, len(column_centers)): distance = abs(item_bounds.xmid() - column_centers[i]) if distance < min_dist: min_dist = distance col_index = i row[col_index] = [item] if any((len(c) == 0 for c in row)): for i in xrange(0, len(column_centers)): table[-1][i] += row[i] else: table.append(row) return pdftable.ImplicitTable(bounds, table)