Exemplo n.º 1
0
def shakeRectangleShapes(pageNd, lClusterPoly, dCellCndPerRow, dTextInCells):
    """
        delete one 'col' and see if the rectangle is stable (area? comparison; rather angle)
        
        angle see baseline object or 
        take the longest line in the polygon and compute its angle
            radian = math.atan((shape.lastpoint.x - shape.firstpoint.x)/(shape.lastpoint.y - shape.firstpoint.y))  
            degrees = radian * 180 / math.pi  
        return degrees  
    """
    #sort by size (reverse)
    #     lClusterPoly = [ShapeLoader.node_to_Polygon(nd) for nd in lClusters]

    ## can be restricted to ones with high angle?
    lNewPoly = []
    for i in sorted(dCellCndPerRow.keys()):
        lcellRows = dCellCndPerRow[i]
        # skip first
        ltl = []
        for c in lcellRows[1:]:
            if c.wkt in dTextInCells:
                for tl in dTextInCells[c.wkt]:
                    assert ShapeLoader.node_to_Polygon(tl).is_valid
                    ltl.append(tl)
        rectangle = ShapeLoader.minimum_rotated_rectangle(ltl,
                                                          bShapelyObject=True)
        #         print (i,lClusterPoly[i].intersection(rectangle).area / lClusterPoly[i].area)
        if not rectangle.is_empty:
            if abs(rectangle.bounds[1] -
                   rectangle.bounds[3]) < abs(lClusterPoly[i].bounds[1] -
                                              lClusterPoly[i].bounds[3]):
                #                 rectangle=scale(rectangle,xfact=1.5)
                cellNd = etree.Element('COL')
                pageNd.append(cellNd)
                rectangle = scale(rectangle, xfact=2)
                lNewPoly.append(rectangle)
                cellNd.set('points', ShapeLoader.getCoordsString(rectangle))

            else:
                lClusterPoly[i] = scale(lClusterPoly[i], xfact=1.5)
                lNewPoly.append(lClusterPoly[i])
        else:
            lClusterPoly[i] = scale(lClusterPoly[i], xfact=1.5)
            lNewPoly.append(lClusterPoly[i])
        ## get length and compute angle !!!  take the one with more horizontal angle::


#         cellNd = etree.Element('COL')
#         pageNd.append(cellNd)
#         if not rectangle.is_empty:
#             rectangle=scale(rectangle,xfact=2)
#             cellNd.set('points',ShapeLoader.getCoordsString(rectangle))
#     print (len(lClusterPoly),len(lNewPoly))
    return lNewPoly
Exemplo n.º 2
0
 def computeShape(cls, ndPage, setID, bConvexHull=False):
     """ 
     compute a shape for this cluster, as the minimum rotated rectangle of its content
     or optionally as the convex hull
     """
     # let's find the nodes and compute the shape
     lNode = [
         ndPage.xpath(".//*[@id='%s']" % _id, namespaces=dNS)[0]
         for _id in setID
     ]
     return       ShapeLoader.convex_hull(lNode, bShapelyObject=True)   \
             if bConvexHull                                             \
             else ShapeLoader.minimum_rotated_rectangle(lNode, bShapelyObject=True)
    def addClusterToDom(self,
                        lCluster,
                        bMoveContent=False,
                        sAlgo="",
                        pageNode=None):
        """
        Add Cluster elements to the Page DOM node
        """
        lNdCluster = []
        for name, lnidx in enumerate(lCluster):
            #self.analysedCluster()
            if pageNode is None:
                for idx in lnidx:
                    pageNode = self.lNode[idx].page.node
                    break
                pageNode.append(
                    etree.Comment(
                        "\nClusters created by the conjugate graph\n"))

            ndCluster = PageXml.createPageXmlNode('Cluster')
            ndCluster.set("name", str(name))
            ndCluster.set("algo", sAlgo)
            # add the space separated list of node ids
            ndCluster.set(
                "content",
                " ".join(self.lNode[_i].node.get("id") for _i in lnidx))
            coords = PageXml.createPageXmlNode('Coords')
            ndCluster.append(coords)
            spoints = ShapeLoader.minimum_rotated_rectangle(
                [self.lNode[_i].node for _i in lnidx])
            coords.set('points', spoints)
            pageNode.append(ndCluster)
            ndCluster.tail = "\n"

            if bMoveContent:
                # move the DOM node of the content to the cluster
                for _i in lnidx:
                    ndCluster.append(self.lNode[_i].node)
            lNdCluster.append(ndCluster)

        return lNdCluster
Exemplo n.º 4
0
    def addClusterToDom(self, lCluster, sAlgo=""):
        """
        From the predicted clusters, we create new TextRegions.
        The original TextRegion, if any, are either removed or renamed TextRegion_GT 
        """
        if hasattr(options, "bEvalRegion") and options.bEvalRegion:
            # in order to evaluate, we must keep the original TextRegion and the Cluster elements that are produced
            return super(My_ConjugateSegmenterGraph_MultiSinglePageXml,
                         self).addClusterToDom(lCluster, sAlgo=sAlgo)

        lNdCluster = []
        dNS = {
            "pc":
            "http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15"
        }

        # the graph has been constructed for a certain page
        pageNode = self.lNode[0].page.node
        # guess its position to create unique XML ID
        #pageNum = 1 #+ len( pageNode.xpath(".//preceding::pc:Page", namespaces=dNS) )
        #traceln(" Page: ",pageNum)
        # enumerate TextAreas to remove
        lNdOldTextRegion = pageNode.xpath(".//pc:TextRegion", namespaces=dNS)

        # we copy the type attribute to the inner TextLine, to preserve the GT info, if any
        for ndTR in lNdOldTextRegion:
            sType = str(ndTR.get("type"))
            for ndTL in ndTR.xpath(".//pc:TextLine", namespaces=dNS):
                ndTL.set("type_gt", sType)

        # replace the ReadingOrder section by a new one
        ndReadingOrder = pageNode.xpath(".//pc:ReadingOrder",
                                        namespaces=dNS)[0]
        pageNode.remove(ndReadingOrder)
        ndReadingOrder = PageXml.createPageXmlNode('ReadingOrder')
        ndOrderedGroup = PageXml.createPageXmlNode('OrderedGroup')
        ndOrderedGroup.set("id", "ro_1")
        ndOrderedGroup.set("caption", "Regions reading order")
        ndReadingOrder.append(ndOrderedGroup)
        pageNode.append(ndReadingOrder)

        # loop over clusters
        for ic, c in enumerate(lCluster):
            ndCluster = PageXml.createPageXmlNode('TextRegion')
            #scid = "cluster_p%d_%d" % (pageNum, ic+1)
            scid = "cluster_%d" % (ic + 1)
            ndCluster.set("id", scid)
            ndCluster.set("custom", "readingOrder {index:%d;}" % ic)

            # TextRegion bounding box
            coords = PageXml.createPageXmlNode('Coords')
            ndCluster.append(coords)
            spoints = ShapeLoader.minimum_rotated_rectangle(
                [self.lNode[_i].node for _i in c])
            coords.set('points', spoints)

            # if the inner TextLine are tagged, let's do a vote to tag the Cluster
            lsType = [self.lNode[_i].node.get('type') for _i in c]
            dType = Counter([o for o in lsType if o is not None])
            mc = dType.most_common(1)
            if mc:
                sXmlLabel = mc[0][0]
                ndCluster.set("type", sXmlLabel)
                PageXml.setCustomAttr(ndCluster, "structure", "type",
                                      sXmlLabel)

            #TextLine: move the DOM node of the content to the cluster
            for _i in c:
                ndCluster.append(self.lNode[_i].node)

            pageNode.append(ndCluster)
            ndCluster.tail = "\n"
            lNdCluster.append(ndCluster)

            ndRegionRefIndexed = PageXml.createPageXmlNode('RegionRefIndexed')
            ndRegionRefIndexed.set("index", str(ic))
            ndRegionRefIndexed.set("regionRef", scid)
            ndRegionRefIndexed.tail = "\n"
            ndOrderedGroup.append(ndRegionRefIndexed)

        # remove or rename the old TextRegion
        for nd in lNdOldTextRegion:
            if False:
                nd.tag = "TextRegion_GT"
            else:
                #pageNode.remove(nd)
                nd.getparent().remove(nd)

        return lNdCluster