Exemplo n.º 1
0
def get_average_line_height(top_bottoms):
    """Tricksy - get height of median line?
    """
    lheights = [b - t for t, b in top_bottoms]
    lhm = numpy.max(lheights)
    def weight(val):
        return 0 if val < (lhm / 2) else 1
    weights = numpy.vectorize(weight)(lheights)
    return numpy.average(numpy.array(lheights), weights=weights)
Exemplo n.º 2
0
def get_average_line_height(top_bottoms):
    """Tricksy - get height of median line?
    """
    lheights = [b - t for t, b in top_bottoms]
    lhm = numpy.max(lheights)

    def weight(val):
        return 0 if val < (lhm / 2) else 1

    weights = numpy.vectorize(weight)(lheights)
    return numpy.average(numpy.array(lheights), weights=weights)
Exemplo n.º 3
0
    def calc_bounding_boxes(self):
        """Get bounding boxes if connected components."""
        concomps = iulib.intarray()
        concomps.copy(self.inverted)
        iulib.label_components(concomps, False)
        bboxes = iulib.rectarray()
        iulib.bounding_boxes(bboxes, concomps)
        self.boxes = []
        for i in range(bboxes.length()):
            if bboxes.at(i).area() > (self.inverted.dim(0) *
                    self.inverted.dim(1) * 0.95):
                continue
            self.boxes.append(i2r(bboxes.at(i)))

        # get the average text height, excluding  any %%
        self.avgheight = trimmed_mean(numpy.sort(numpy.array(
            [r.height() for r in self.boxes])), 5, 5)

        # remove large or weird boxes from the inverted images
        self.boxes = strip_non_chars(self.inverted, self.boxes, self.avgheight)
Exemplo n.º 4
0
    def calc_bounding_boxes(self):
        """Get bounding boxes if connected components."""
        concomps = iulib.intarray()
        concomps.copy(self.inverted)
        iulib.label_components(concomps, False)
        bboxes = iulib.rectarray()
        iulib.bounding_boxes(bboxes, concomps)
        self.boxes = []
        for i in range(bboxes.length()):
            if bboxes.at(i).area() > (self.inverted.dim(0) *
                                      self.inverted.dim(1) * 0.95):
                continue
            self.boxes.append(i2r(bboxes.at(i)))

        # get the average text height, excluding  any %%
        self.avgheight = trimmed_mean(
            numpy.sort(numpy.array([r.height() for r in self.boxes])), 5, 5)

        # remove large or weird boxes from the inverted images
        self.boxes = strip_non_chars(self.inverted, self.boxes, self.avgheight)