def addEdgeToDoc(self, Y=None): """ To display the grpah conveniently we add new Edge elements """ import random (pnum, page, ndPage) = next(self._iter_Page_DocNode(self.doc)) w = int(ndPage.get("imageWidth")) nn = 1 + len([e for e in self.lEdge if type(e) not in [HorizontalEdge, VerticalEdge, Edge_BL]]) ii = 0 for edge in self.lEdge: if type(edge) in [HorizontalEdge, VerticalEdge]: A, B = edge.A.shape.centroid, edge.B.shape.centroid elif type(edge) in [Edge_BL]: A = edge.A.shape.centroid # not readable _pt, B = shapely.ops.nearest_points(A, edge.B.shape) _pt, B = shapely.ops.nearest_points(edge.A.shape, edge.B.shape) else: ii += 1 x = 1 + ii * (w/nn) pt = geom.Point(x, 0) A, _ = shapely.ops.nearest_points(edge.A.shape, pt) B, _ = shapely.ops.nearest_points(edge.B.shape, pt) ndSep = MultiPageXml.createPageXmlNode("Edge") ndSep.set("DU_type", type(edge).__name__) ndPage.append(ndSep) MultiPageXml.setPoints(ndSep, [(A.x, A.y), (B.x, B.y)]) return
def addSeparator(root, lCells): """ Add separator that correspond to cell boundaries modify the XML DOM """ dRow, dCol = getCellsSeparators(lCells) try: ndTR = MultiPageXml.getChildByName(root,'TableRegion')[0] except IndexError: raise TableAnnotationException("No TableRegion!!! ") lRow = sorted(dRow.keys()) lB = [] for row in lRow: (x1, y1), (x2, y2) = dRow[row] b = math.degrees(math.atan((y2-y1) / (x2-x1))) lB.append(b) ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion") ndSep.set("orient", "horizontal angle=%.2f" % b) ndSep.set("row", "%d" % row) ndTR.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)]) ndSep.append(ndCoord) sStat = "\tHORIZONTAL: Average=%.1f° stdev=%.2f° min=%.1f° max=%.1f°" % ( np.average(lB), np.std(lB), min(lB), max(lB) ) ndTR.append(etree.Comment(sStat)) traceln(sStat) lCol = sorted(dCol.keys()) lB = [] for col in lCol: (x1, y1), (x2, y2) = dCol[col] b = 90 -math.degrees(math.atan((x2-x1) / (y2 - y1))) lB.append(b) ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion") ndSep.set("orient", "vertical %.2f" % b) ndSep.set("col", "%d" % col) ndTR.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)]) ndSep.append(ndCoord) sStat = "\tVERTICAL : Average=%.1f° stdev=%.2f° min=%.1f° max=%.1f°" % ( np.average(lB), np.std(lB), min(lB), max(lB) ) ndTR.append(etree.Comment(sStat)) traceln(sStat) return
def addPageXmlSeparator(cls, nd, sLabel, x1, y1, x2, y2, domid): ndSep = MultiPageXml.createPageXmlNode("CutSeparator") if not sLabel is None: # propagate the groundtruth info we have ndSep.set("type", sLabel) if abs(x2 - x1) > abs(y2 - y1): ndSep.set("orient", "0") else: ndSep.set("orient", "90") ndSep.set("id", "s_%d" % domid) nd.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)]) ndSep.append(ndCoord) return ndSep
def addPageXmlSeparator(cls, ndPage, oCut, domid): ndSep = MultiPageXml.createPageXmlNode("CutSeparator") # propagate the groundtruth info we have ndSep.set("DU_type", oCut._du_label) ndSep.set("orient", "0") ndSep.set("DU_angle", "%.1f" % math.degrees(oCut._du_angle)) ndSep.set("DU_angle_freq", "%.3f" % oCut._du_angle_freq) ndSep.set("DU_angle_cumul_freq", "%.3f" % oCut._du_angle_cumfreq) ndSep.set("DU_set_support", "%s" % oCut._du_set_support) ndSep.set("id", "cs_%d" % domid) ndPage.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, oCut.coords) ndSep.append(ndCoord) return ndSep
def addPageXmlSeparator(nd, i, lGTi, x1, y1, x2, y2, domid): ndSep = MultiPageXml.createPageXmlNode("GridSeparator") if lGTi: # propagate the groundtruth info we have sLabel = self.getLabel(i, lGTi) ndSep.set("type", sLabel) if abs(x2 - x1) > abs(y2 - y1): ndSep.set("orient", "0") else: ndSep.set("orient", "90") ndSep.set("id", "s_%d" % domid) nd.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)]) ndSep.append(ndCoord) return ndSep
def addEdgeToDOM(self): """ To display the grpah conveniently we add new Edge elements Since we change the BAseline representation, we show the new one """ super().addEdgeToDOM() for blk in self.lNode: assert blk.type.name in ["row", "sepH"], blk.type.name if blk.type.name == "row": ndBaseline = blk.node.xpath(".//pc:Baseline", namespaces=self.dNS)[0] o = self.shaper_fun(ndBaseline) MultiPageXml.setPoints(ndBaseline, list(o.coords)) return
def addSeparator(root, lCells): """ Add separator that correspond to cell boundaries modify the XML DOM """ # let's collect the segment forming the separators dRowSep_lSgmt = collections.defaultdict(list) dColSep_lSgmt = collections.defaultdict(list) for cell in lCells: row, col, rowSpan, colSpan = [int(cell.get(sProp)) for sProp \ in ["row", "col", "rowSpan", "colSpan"] ] coord = cell.xpath("./a:%s" % ("Coords"), namespaces={"a": MultiPageXml.NS_PAGE_XML})[0] sPoints = coord.get('points') plgn = Polygon.parsePoints(sPoints) try: lT, lR, lB, lL = plgn.partitionSegmentTopRightBottomLeft() #now the top segments contribute to row separator of index: row dRowSep_lSgmt[row].extend(lT) #now the bottom segments contribute to row separator of index: row+rowSpan dRowSep_lSgmt[row + rowSpan].extend(lB) dColSep_lSgmt[col].extend(lL) dColSep_lSgmt[col + colSpan].extend(lR) except ValueError: pass #now make linear regression to draw relevant separators def getX(lSegment): lX = list() for x1, y1, x2, y2 in lSegment: lX.append(x1) lX.append(x2) return lX def getY(lSegment): lY = list() for x1, y1, x2, y2 in lSegment: lY.append(y1) lY.append(y2) return lY ndTR = MultiPageXml.getChildByName(root, 'TableRegion')[0] lB = [] for irow, lSegment in dRowSep_lSgmt.items(): X = getX(lSegment) Y = getY(lSegment) #sum(l,()) lfNorm = [ np.linalg.norm([[x1, y1], [x2, y2]]) for x1, y1, x2, y2 in lSegment ] #duplicate each element W = [fN for fN in lfNorm for _ in (0, 1)] a, b = np.polynomial.polynomial.polyfit(X, Y, 1, w=W) xmin, xmax = min(X), max(X) y1 = a + b * xmin y2 = a + b * xmax lB.append(b * 100) ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion") ndSep.set("orient", "horizontal %.1f %.3f" % (a, b)) ndTR.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(xmin, y1), (xmax, y2)]) ndSep.append(ndCoord) sStat = "\tHORIZONTAL: Average=%.1f%% stdev=%.2f%% min=%.1f%% max=%.1f%%" % ( np.average(lB), np.std(lB), min(lB), max(lB)) ndTR.append(etree.Comment(sStat)) print(sStat) lB = [] for icol, lSegment in dColSep_lSgmt.items(): X = getX(lSegment) Y = getY(lSegment) #sum(l,()) lfNorm = [ np.linalg.norm([[x1, y1], [x2, y2]]) for x1, y1, x2, y2 in lSegment ] #duplicate each element W = [fN for fN in lfNorm for _ in (0, 1)] # a * x + b a, b = np.polynomial.polynomial.polyfit(Y, X, 1, w=W) lB.append(b * 100) ymin, ymax = min(Y), max(Y) x1 = a + b * ymin x2 = a + b * ymax ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion") ndSep.set("orient", "vertical %.1f %.3f" % (a, b)) ndTR.append(ndSep) ndCoord = MultiPageXml.createPageXmlNode("Coords") MultiPageXml.setPoints(ndCoord, [(x1, ymin), (x2, ymax)]) ndSep.append(ndCoord) sStat = "\tVERTICAL : Average=%.1f%% stdev=%.2f%% min=%.1f%% max=%.1f%%" % ( np.average(lB), np.std(lB), min(lB), max(lB)) ndTR.append(etree.Comment(sStat)) print(sStat) return