def tagSeparatorRegion(lPages):
    """
        tag separatorRegion
    """
    for page in lPages:
        lSeparators = MultiPageXml.getChildByName(page, 'SeparatorRegion')
        lTables = MultiPageXml.getChildByName(page, 'TableRegion')
        if lTables == []:
            print("no table for %s" % sys.argv[1])
            sys.exit(0)
        # default O
        [x.set(sDUSep, lLabels_OI[0]) for x in lSeparators]

        for table in lTables:
            lPolygonTables = [
                ShapePo(MultiPageXml.getPointList(x)) for x in lTables
            ]
            lPolygonSep = [
                LineString(MultiPageXml.getPointList(x)) for x in lSeparators
            ]

            for table in lPolygonTables:
                table_prep = prep(table)
                [
                    lSeparators[i].set(sDUSep, lLabels_OI[1])
                    for i, x in enumerate(lPolygonSep)
                    if table_prep.intersects(x)
                ]

        ## fix bindings
        for table in lTables:
            lCells = MultiPageXml.getChildByName(table, 'TableCell')
            lCells = list(filter(lambda x: int(x.get('rowSpan')) > 6, lCells))
            lPolygonCells = [
                ShapePo(MultiPageXml.getPointList(x)) for x in lCells
            ]
            for cell in lPolygonCells:
                cell_prep = prep(scale(cell, xfact=0.5))
                for i, x in enumerate(lPolygonSep):
                    if cell_prep.intersects(x) and (
                            x.bounds[3] - x.bounds[1]) > (x.bounds[2] -
                                                          x.bounds[0]):
                        lSeparators[i].set(sDUSep, lLabels_OI[0])
Пример #2
0
def tagSeparatorRegion(lPages):
    """
        tag separatorRegion
    """
    for page in lPages:
        lSeparators = MultiPageXml.getChildByName(page, 'SeparatorRegion')
        lTables = MultiPageXml.getChildByName(page, 'TableRegion')
        if lTables == []:
            print("no table for %s" % sys.argv[1])
            sys.exit(0)
        # default O
        [x.set(sDUSep, lLabels_OI[0]) for x in lSeparators]

        for table in lTables:
            polygonTable = ShapePo(MultiPageXml.getPointList(table))
            lPolygonSep = [
                ShapePo(LineString(MultiPageXml.getPointList(x)).buffer(10))
                for x in lSeparators
            ]

            table_prep = prep(polygonTable)
            [
                lSeparators[i].set(sDUSep, lLabels_OI[1])
                for i, x in enumerate(lPolygonSep) if table_prep.intersects(x)
            ]

            #             ## given some thickness to the table borders?
            #             lT, lR, lB, lL =  getVerHorBorders(table)
            #             lH = [];lV= []
            #             [lH.append(ShapePo(LineString(((x[0][0],x[0][1]),(x[0][2],x[0][3]))).buffer(10))) for x in [lT,lB]]
            #             [lV.append(ShapePo(LineString(((x[0][0],x[0][1]),(x[0][2],x[0][3]))).buffer(10))) for x in [lL,lR]]

            col1, colN, row1, RowN = defineTableBordersFromCells(table)

            ## vertical borders
            # intersection of vertical sep with vertival border
            for v in [col1, colN]:
                v_prep = prep(v)
                for i, x in enumerate(lPolygonSep):
                    if v_prep.intersects(x) and (x.bounds[3] - x.bounds[1]) > (
                            x.bounds[2] - x.bounds[0]):
                        lSeparators[i].set(sDUSep, lLabels_OI[2])

            ## horizontal borders
            for h in [row1, RowN]:
                h_prep = prep(h)
                for i, x in enumerate(lPolygonSep):
                    if h_prep.intersects(x) and (x.bounds[3] - x.bounds[1]) < (
                            x.bounds[2] - x.bounds[0]):
                        lSeparators[i].set(sDUSep, lLabels_OI[2])

        ## fix bindings
        for table in lTables:
            lCells = MultiPageXml.getChildByName(table, 'TableCell')
            lCells = list(filter(lambda x: int(x.get('rowSpan')) > 6, lCells))
            lPolygonCells = [
                ShapePo(MultiPageXml.getPointList(x)) for x in lCells
            ]
            for cell in lPolygonCells:
                cell_prep = prep(scale(cell, xfact=0.5))
                for i, x in enumerate(lPolygonSep):
                    if cell_prep.intersects(x) and (
                            x.bounds[3] - x.bounds[1]) > (x.bounds[2] -
                                                          x.bounds[0]):
                        lSeparators[i].set(sDUSep, lLabels_OI[0])