示例#1
0
 def given_plane_with_one_object(object_size=50, gridsize=50):
     bounding_box = (0, 0, 100, 100)
     plane = Plane(bounding_box, gridsize)
     obj = LTComponent((0, 0, object_size, object_size))
     plane.add(obj)
     return plane, obj
示例#2
0
    def test_find_neighbors_vertical(self):
        laparams = LAParams()
        plane = Plane((0, 0, 50, 50))

        line = LTTextLineVertical(laparams.word_margin)
        line.set_bbox((4, 10, 6, 20))
        plane.add(line)

        bottom_aligned_right = LTTextLineVertical(laparams.word_margin)
        bottom_aligned_right.set_bbox((6, 10, 8, 15))
        plane.add(bottom_aligned_right)

        top_aligned_left = LTTextLineVertical(laparams.word_margin)
        top_aligned_left.set_bbox((2, 15, 4, 20))
        plane.add(top_aligned_left)

        centrally_aligned_overlapping = LTTextLineVertical(
            laparams.word_margin)
        centrally_aligned_overlapping.set_bbox((5, 13, 7, 17))
        plane.add(centrally_aligned_overlapping)

        not_aligned = LTTextLineVertical(laparams.word_margin)
        not_aligned.set_bbox((6, 0, 8, 5))
        plane.add(not_aligned)

        wrong_width = LTTextLineVertical(laparams.word_margin)
        wrong_width.set_bbox((6, 10, 10, 15))
        plane.add(wrong_width)

        neighbors = line.find_neighbors(plane, laparams.line_margin)
        self.assertCountEqual(
            neighbors,
            [
                line,
                bottom_aligned_right,
                top_aligned_left,
                centrally_aligned_overlapping,
            ],
        )
示例#3
0
class Sheet1 (object):
    cells = None
    text_layout = None
    column_edges = None
    row_edges = None


    def __init__(self):
        self.cells = Plane()
        self.text_layout = Plane()
        self.row_edges = {}
        self.column_edges = {}

    def add_cell (self, cell):
        self.cells.add(cell)


    def add_text (self, cell_text):
        self.text_layout.add(cell_text)
#        if cell_text.text[:3] == 'Oil': print cell_text.text, cell_text.bbox

    def add_column_edge (self, x_value):
        x = round(x_value,2)
        self.column_edges[x] = 1+ self.column_edges.get(x,0)

    def add_row_edge (self, y_value):
        y = round(y_value,2)
        self.row_edges[y] = 1+ self.row_edges.get(y,0)

    def add_line (self, bbox):
        if bbox[0]==bbox[2]:
            # vertical line
            self.add_column_edge(bbox[0])
        elif bbox[1]==bbox[3]:
            #horizontal line
            self.add_row_edge(bbox[1])
        else:
            print ('WARNING: non-orthogonal line found: %s'%bbox)

    def add_rect (self, bbox):
        self.add_column_edge(bbox[0])
        self.add_column_edge(bbox[2])
        self.add_row_edge(bbox[1])
        self.add_row_edge(bbox[3])

    def add_ltcontainer (self, obj, page_y_offset):
        #NB: row indexes (y axis) are negative!

        bbox = (
            round(obj.x0,2),
            round(-(obj.y1+page_y_offset),2),
            round(obj.x1,2),
            round(-(obj.y0+page_y_offset),2)
            )

        if isinstance (obj, LTTextLine):
            self.add_text (CellText(bbox, obj.get_text()))
        elif isinstance (obj, LTLine):
            self.add_line(bbox)
        elif isinstance (obj, LTRect):
            self.add_rect(bbox)
        elif isinstance (obj, LTContainer):
            for child in obj:
                self.add_ltcontainer (child, page_y_offset)
        else:
            pass

    def extract_rows (self):
#        for obj in self.text_layout.find((690, -1200, 800, -1000)):
#            print obj.bbox,obj.text

        row_bounds = sorted(self.row_edges)
        col_bounds = sorted(self.column_edges)

#        pprint.pprint(col_bounds)

        rows = []

        r0 = row_bounds[0] - 1 if row_bounds else 0

        #NB: row indexes (y axis) are negative!
        for r1 in row_bounds:
            if r1 - r0 < 1: continue

#            print r1-r0,r0,r1

            row=[]
            c0 = 0
            for c1 in col_bounds:
                if c1 - c0 < 1: continue

#                print c0,r0,c1,r1
                # get all text lines that intersect the bounds of this cell
                lines = [l for l in self.text_layout.find((c0,r0,c1,r1))]
                #sort from top to bottom
                lines = sorted(lines, key=lambda line: line.y0)

#                text = ' '.join([t.text.strip() for t in lines if t.x0 >= c0 and t.x0 <= c1])
#                if text[:10] == 'Production': print text,c0,r0,c1,r1
#                if text[:3] == 'Oil': print text,c0,r0,c1,r1

                # remove anything where the left edge is not inside the cell and concatenate the rest
                row.append(' '.join([t.text.strip() for t in lines if t.x0 >= c0 and t.x0 <= c1]))
                c0 = c1
            rows.append(row)
            r0 = r1
        return rows
示例#4
0
    def test_find_neighbors_horizontal(self):
        laparams = LAParams()
        plane = Plane((0, 0, 50, 50))

        line = LTTextLineHorizontal(laparams.word_margin)
        line.set_bbox((10, 4, 20, 6))
        plane.add(line)

        left_aligned_above = LTTextLineHorizontal(laparams.word_margin)
        left_aligned_above.set_bbox((10, 6, 15, 8))
        plane.add(left_aligned_above)

        right_aligned_below = LTTextLineHorizontal(laparams.word_margin)
        right_aligned_below.set_bbox((15, 2, 20, 4))
        plane.add(right_aligned_below)

        centrally_aligned_overlapping = LTTextLineHorizontal(
            laparams.word_margin)
        centrally_aligned_overlapping.set_bbox((13, 5, 17, 7))
        plane.add(centrally_aligned_overlapping)

        not_aligned = LTTextLineHorizontal(laparams.word_margin)
        not_aligned.set_bbox((0, 6, 5, 8))
        plane.add(not_aligned)

        wrong_height = LTTextLineHorizontal(laparams.word_margin)
        wrong_height.set_bbox((10, 6, 15, 10))
        plane.add(wrong_height)

        neighbors = line.find_neighbors(plane, laparams.line_margin)
        self.assertCountEqual(
            neighbors,
            [
                line,
                left_aligned_above,
                right_aligned_below,
                centrally_aligned_overlapping,
            ],
        )