def _process_segment(self, model, dataset, page, page_xywh, page_id,
                         input_file, orig_img_size, n):
        for i, data in enumerate(dataset):
            w, h = orig_img_size
            generated = model.inference(data['label'], data['inst'],
                                        data['image'])
            dewarped = array(generated.data[0].permute(1, 2, 0).detach().cpu())
            bin_array = array(255 * (dewarped > ocrolib.midrange(dewarped)),
                              'B')
            dewarped = ocrolib.array2pil(bin_array)
            dewarped = dewarped.resize((w, h))

            page_xywh['features'] += ',dewarped'

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.image_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.image_grp, n)

            file_path = self.workspace.save_image_file(
                dewarped,
                file_id,
                page_id=page_id,
                file_grp=self.image_grp,
                force=self.parameter['force'])
            page.add_AlternativeImage(
                AlternativeImageType(filename=file_path,
                                     comments=page_xywh['features']))
    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n):
        img_array = ocrolib.pil2array(page_image)

        # Check if image is RGB or not #FIXME: check not needed anymore?
        if len(img_array.shape) == 2:
            img_array = np.stack((img_array,)*3, axis=-1)

        img_array_bin = np.array(
            img_array > ocrolib.midrange(img_array), 'i')

        lineDetectH = []
        lineDetectV = []
        img_array_rr = self.remove_rular(img_array)

        textarea, img_array_rr_ta, height, width = self.detect_textarea(
            img_array_rr)
        colSeparator = int(
            width * self.parameter['colSeparator'])
        if len(textarea) > 1:
            textarea = self.crop_area(
                textarea, img_array_bin, img_array_rr_ta, colSeparator)

            if len(textarea) == 0:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)
            else:
                min_x, min_y, max_x, max_y = textarea[0]
        elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1-20 if x1 > 20 else 0
            x2 = x2+20 if x2 < width-20 else width
            y1 = y1-40 if y1 > 40 else 0
            y2 = y2+40 if y2 < height-40 else height

            min_x, min_y, max_x, max_y = textarea[0]
        else:
            min_x, min_y, max_x, max_y = self.select_borderLine(
                img_array_rr, lineDetectH, lineDetectV)

        border_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
        border_polygon = coordinates_for_segment(border_polygon, page_image, page_xywh)
        border_points = points_from_polygon(border_polygon)
        brd = BorderType(Coords=CoordsType(border_points))
        page.set_Border(brd)

        page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y))
        page_xywh['features'] += ',cropped'

        file_id = make_file_id(input_file, self.output_file_grp)

        file_path = self.workspace.save_image_file(page_image,
                                                   file_id + '-IMG',
                                                   page_id=page_id,
                                                   file_grp=self.output_file_grp)
        page.add_AlternativeImage(AlternativeImageType(
            filename=file_path, comments=page_xywh['features']))
示例#3
0
文件: lineest.py 项目: beanyh/yeobaek
def blxlimages(image,shapedict,bls,xls):
    image = (image>ocrolib.midrange(image))
    if amax(image)==0: raise RecognitionError("empty line")
    seg = lineseg.ccslineseg(image)
    # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input()
    seg = morph.renumber_by_xcenter(seg)
    blimage = zeros(image.shape)
    xlimage = zeros(image.shape)
    for sub,transform,itransform_add in extract_chars(seg):
        best = shapedict.predict1(sub)
        bli = bls[best].reshape(32,32)
        xli = xls[best].reshape(32,32)
        itransform_add(blimage,bli)
        itransform_add(xlimage,xli)
    return blimage,xlimage
示例#4
0
def blxlimages(image, shapedict, bls, xls):
    image = (image > ocrolib.midrange(image))
    if amax(image) == 0: raise RecognitionError("empty line")
    seg = lineseg.ccslineseg(image)
    # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input()
    seg = morph.renumber_by_xcenter(seg)
    blimage = zeros(image.shape)
    xlimage = zeros(image.shape)
    for sub, transform, itransform_add in extract_chars(seg):
        best = shapedict.predict1(sub)
        bli = bls[best].reshape(32, 32)
        xli = xls[best].reshape(32, 32)
        itransform_add(blimage, bli)
        itransform_add(xlimage, xli)
    return blimage, xlimage
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):

        I = ocrolib.pil2array(page_image)
        if len(I.shape) > 2:
            I = np.mean(I, 2)
        I = 1 - I / I.max()
        rows, cols = I.shape

        # Generate Mask and Seed Images
        Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

        # Iseedfill: Union of Mask and Seed Images
        Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

        # Dilation of Iseedfill
        mask = ones((3, 3))
        Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

        # Expansion of Iseedfill to become equal in size of I
        Iseedfill = self.expansion(Iseedfill, (rows, cols))

        # Write Text and Non-Text images
        image_part = array((1 - I * Iseedfill), dtype=int)
        image_part[0, 0] = 0  # only for visualisation purpose
        text_part = array((1 - I * (1 - Iseedfill)), dtype=int)
        text_part[0, 0] = 0  # only for visualisation purpose

        page_xywh['features'] += ',tiseged'

        bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
示例#6
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):

        raw = ocrolib.pil2array(page_image)
        flat = raw.astype("float64")

        # estimate skew angle and rotate
        if self.parameter['maxskew'] > 0:
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Skew Angle")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            flat = amax(flat) - flat
            flat -= amin(flat)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            ma = self.parameter['maxskew']
            ms = int(2 * self.parameter['maxskew'] *
                     self.parameter['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
            flat = interpolation.rotate(flat,
                                        angle,
                                        mode='constant',
                                        reshape=0)
            flat = amax(flat) - flat
        else:
            angle = 0

        # self.write_angles_to_pageXML(base,angle)
        # estimate low and high thresholds
        if self.parameter['parallel'] < 2:
            LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])

        # rescale the image to get the gray scale image
        if self.parameter['parallel'] < 2:
            LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        deskewed = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle))

        #TODO: Need some clarification as the results effect the following pre-processing steps.
        #orientation = -angle
        #orientation = 180 - ((180 - orientation) % 360)

        if angle is None:  # FIXME: quick fix to prevent angle of "none"
            angle = 0

        page.set_orientation(angle)

        page_xywh['features'] += ',deskewed'
        bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B')
        page_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(page_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            #fname = str(fname)
            print("Process file: ", fname)
            base, _ = ocrolib.allsplitext(fname)

            img_array = ocrolib.pil2array(img)
            img_array_bin = np.array(img_array > ocrolib.midrange(img_array),
                                     'i')

            lineDetectH = []
            lineDetectV = []
            img_array_rr = self.remove_rular(img_array)

            textarea, img_array_rr_ta, height, width = self.detect_textarea(
                img_array_rr)
            self.parameter['colSeparator'] = int(
                width * self.parameter['colSeparator'])

            if len(textarea) > 1:
                textarea = self.crop_area(textarea, img_array_bin,
                                          img_array_rr_ta)

                if len(textarea) == 0:
                    min_x, min_y, max_x, max_y = self.select_borderLine(
                        img_array_rr, lineDetectH, lineDetectV)
                else:
                    min_x, min_y, max_x, max_y = textarea[0]
            elif len(textarea) == 1 and (
                    height * width * 0.5 <
                (abs(textarea[0][2] - textarea[0][0]) *
                 abs(textarea[0][3] - textarea[0][1]))):
                x1, y1, x2, y2 = textarea[0]
                x1 = x1 - 20 if x1 > 20 else 0
                x2 = x2 + 20 if x2 < width - 20 else width
                y1 = y1 - 40 if y1 > 40 else 0
                y2 = y2 + 40 if y2 < height - 40 else height

                #self.save_pf(base, [x1, y1, x2, y2])
                min_x, min_y, max_x, max_y = textarea[0]
            else:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)

            brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                               (min_x, min_y, max_x, min_y,
                                                max_x, max_y, min_x, max_y)))
            pcgts.get_Page().set_Border(brd)

            # Use input_file's basename for the new file -
            # this way the files retain the same basenames:
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))
示例#8
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, model):

        I = ocrolib.pil2array(page_image)
        LOG.info('image size: %s', page_image.size)

        if model:

            if len(I.shape) < 3:
                print('Wrong input shape. Image should have 3 channel')

            # get prediction
            out = model.predict_segmentation(inp=I, out_fname="/tmp/out.png")
            cv2.imwrite('out_image.png', out * (255 / 2))
            text_part = np.ones(out.shape)
            text_part[np.where(out == 1)] = 0

            image_part = np.ones(out.shape)
            image_part[np.where(out == 2)] = 0

            image_part = array(255 * (image_part), 'B')
            image_part = ocrolib.array2pil(image_part)

            text_part = array(255 * (text_part), 'B')
            text_part = ocrolib.array2pil(text_part)

            text_part = text_part.resize(page_image.size, Image.BICUBIC)
            image_part = image_part.resize(page_image.size, Image.BICUBIC)

        else:

            if len(I.shape) > 2:
                I = np.mean(I, 2)
            I = 1 - I / I.max()
            rows, cols = I.shape

            # Generate Mask and Seed Images
            Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

            # Iseedfill: Union of Mask and Seed Images
            Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

            # Dilation of Iseedfill
            mask = ones((3, 3))
            Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

            # Expansion of Iseedfill to become equal in size of I
            Iseedfill = self.expansion(Iseedfill, (rows, cols))

            # Write Text and Non-Text images
            image_part = array((1 - I * Iseedfill), dtype=int)
            text_part = array((1 - I * (1 - Iseedfill)), dtype=int)

            bin_array = array(255 * (text_part > ocrolib.midrange(img_part)),
                              'B')
            text_part = ocrolib.array2pil(bin_array)

            bin_array = array(255 * (text_part > ocrolib.midrange(text_part)),
                              'B')
            image_part = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(
            image_part,
            file_id + "_img",
            page_id=page_id,
            file_grp=self.image_grp,
            force=self.parameter['force'])
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features'] + ',non_text'))

        page_xywh['features'] += ',clipped'
        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(
            text_part,
            file_id + "_txt",
            page_id=page_id,
            file_grp=self.image_grp,
            force=self.parameter['force'])
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
示例#9
0
    def _process_segment(self, page, filename, page_id, file_id):
        raw = ocrolib.read_image_gray(filename)
        self.dshow(raw, "input")

        # perform image normalization
        image = raw - amin(raw)
        if amax(image) == amin(image):
            LOG.info("# image is empty: %s" % (page_id))
            return
        image /= amax(image)

        if not self.parameter['nocheck']:
            check = self.check_page(amax(image) - image)
            if check is not None:
                LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " +
                          check + " (use -n to disable this check)")
                return

        # check whether the image is already effectively binarized
        if self.parameter['gray']:
            extreme = 0
        else:
            extreme = (np.sum(image < 0.05) +
                       np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
        if extreme > 0.95:
            comment = "no-normalization"
            flat = image
        else:
            comment = ""
            # if not, we need to flatten it by estimating the local whitelevel
            LOG.info("Flattening")
            m = interpolation.zoom(image, self.parameter['zoom'])
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(self.parameter['range'], 2))
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(2, self.parameter['range']))
            m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
            if self.parameter['debug'] > 0:
                clf()
                imshow(m, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            w, h = minimum(array(image.shape), array(m.shape))
            flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
            if self.parameter['debug'] > 0:
                clf()
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])

        # estimate low and high thresholds
        LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        binarized = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
        LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment))
        LOG.info("writing")
        if self.parameter['debug'] > 0 or self.parameter['show']:
            clf()
            gray()
            imshow(binarized)
            ginput(1, max(0.1, self.parameter['debug']))
        #base, _ = ocrolib.allsplitext(filename)
        #ocrolib.write_image_binary(base + ".bin.png", binarized)
        # ocrolib.write_image_gray(base +".nrm.png", flat)
        # print("########### File path : ", base+".nrm.png")
        # write_to_xml(base+".bin.png")
        # return base+".bin.png"

        bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path, comment="binarized"))
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):
        # Get image orientation
        # orientation = page.get_orientation() # This function is not working
        #         rotated_image = self.rotate_image(orientation, page_image)
        #         img_array = ocrolib.pil2array(rotated_image)

        img_array = ocrolib.pil2array(page_image)

        # Check if image is RGB or not #FIXME: check not needed anymore?
        if len(img_array.shape) == 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)

        img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i')

        lineDetectH = []
        lineDetectV = []
        img_array_rr = self.remove_rular(img_array)

        textarea, img_array_rr_ta, height, width = self.detect_textarea(
            img_array_rr)
        self.parameter['colSeparator'] = int(width *
                                             self.parameter['colSeparator'])
        if len(textarea) > 1:
            textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta)

            if len(textarea) == 0:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)
            else:
                min_x, min_y, max_x, max_y = textarea[0]
        elif len(textarea) == 1 and (height * width * 0.5 <
                                     (abs(textarea[0][2] - textarea[0][0]) *
                                      abs(textarea[0][3] - textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1 - 20 if x1 > 20 else 0
            x2 = x2 + 20 if x2 < width - 20 else width
            y1 = y1 - 40 if y1 > 40 else 0
            y2 = y2 + 40 if y2 < height - 40 else height

            min_x, min_y, max_x, max_y = textarea[0]
        else:
            min_x, min_y, max_x, max_y = self.select_borderLine(
                img_array_rr, lineDetectH, lineDetectV)

        brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                           (min_x, min_y, max_x, min_y, max_x,
                                            max_y, min_x, max_y)))
        page.set_Border(brd)

        page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y))
        page_xywh['features'] += ',cropped'

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)

        file_path = self.workspace.save_image_file(page_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
    def _process_segment(self, page_image, page, textregion, region_xywh,
                         page_id, input_file, n):
        LOG = getLogger('OcrdAnybaseocrTextline')
        #check for existing text lines and whether to overwrite them
        if textregion.get_TextLine():
            if self.parameter['overwrite']:
                LOG.info('removing existing TextLines in region "%s"', page_id)
                textregion.set_TextLine([])
            else:
                LOG.warning('keeping existing TextLines in region "%s"',
                            page_id)
                return

        binary = ocrolib.pil2array(page_image)

        if len(binary.shape) > 2:
            binary = np.mean(binary, 2)
        binary = np.array(1 - binary / np.amax(binary), 'B')

        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']

        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning(str(scale) + ": bad scale; skipping!\n")
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("too many lines %i; skipping!\n",
                        (np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])

        for i, l in enumerate(lines):
            #LOG.info('check this: ')
            #LOG.info(type(l.bounds))
            #LOG.info(l.bounds)
            #line_points = np.where(l.mask==1)
            #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull
            #x,y = hull.exterior.coords.xy
            #LOG.info('hull coords x: ',x)
            #LOG.info('hull coords y: ',y)

            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y],
                            [min_x, max_y]]

            #line_polygon = [x for x in zip(y, x)]
            line_polygon = coordinates_for_segment(line_polygon, page_image,
                                                   region_xywh)
            line_points = points_from_polygon(line_polygon)

            img = cleaned[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = 255 - img
            img = ocrolib.array2pil(img)

            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                img,
                file_id + "_" + str(n) + "_" + str(i),
                page_id=page_id,
                file_grp=self.output_file_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line_id = '%s_line%04d' % (page_id, i)
            line = TextLineType(custom='readingOrder {index:' + str(i) + ';}',
                                id=line_id,
                                Coords=CoordsType(line_points))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)
示例#12
0
    def _process_segment(self, page_image, page, region_xywh, page_id,
                         input_file, n):
        binary = ocrolib.pil2array(page_image)
        binary = np.array(1 - binary / np.amax(binary), 'B')
        if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1:
            min_x, max_x = (0, binary.shape[0])
            min_y, max_y = (0, binary.shape[1])
            textregion = TextRegionType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            page.add_TextRegion(textregion)
        else:
            textregion = page.get_TextRegion()[-1]
        ocrolib.write_image_binary("test.bin.png", binary)
        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']
        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale))
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("%s: too many lines %i",
                        (fname, np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])
        region_xywh['features'] += ",textline"
        for i, l in enumerate(lines):
            ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0],
                                                              l.bounds[1]])
            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            img = binary[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = ocrolib.array2pil(img)

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.image_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.image_grp, n)

            file_path = self.workspace.save_image_file(img,
                                                       file_id + "_" + str(i),
                                                       page_id=page_id,
                                                       file_grp=self.image_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line = TextLineType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)
示例#13
0
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
            page = pcgts.get_Page()
            page_image, page_xywh, _ = self.workspace.image_from_page(
                page, page_id)
            print(type(page_image), page_image.filename)

            # Get image orientation
            orientation = pcgts.get_Page().get_orientation()
            rotated_image = self.rotate_image(orientation, page_image)

            LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID)

            img_array = ocrolib.pil2array(rotated_image)

            #Check if image is RGB or not
            if len(img_array.shape) == 2:
                img_array = np.stack((img_array, ) * 3, axis=-1)

            img_array_bin = np.array(img_array > ocrolib.midrange(img_array),
                                     'i')

            lineDetectH = []
            lineDetectV = []
            img_array_rr = self.remove_rular(img_array)

            textarea, img_array_rr_ta, height, width = self.detect_textarea(
                img_array_rr)
            self.parameter['colSeparator'] = int(
                width * self.parameter['colSeparator'])

            if len(textarea) > 1:
                textarea = self.crop_area(textarea, img_array_bin,
                                          img_array_rr_ta)

                if len(textarea) == 0:
                    min_x, min_y, max_x, max_y = self.select_borderLine(
                        img_array_rr, lineDetectH, lineDetectV)
                else:
                    min_x, min_y, max_x, max_y = textarea[0]
            elif len(textarea) == 1 and (
                    height * width * 0.5 <
                (abs(textarea[0][2] - textarea[0][0]) *
                 abs(textarea[0][3] - textarea[0][1]))):
                x1, y1, x2, y2 = textarea[0]
                x1 = x1 - 20 if x1 > 20 else 0
                x2 = x2 + 20 if x2 < width - 20 else width
                y1 = y1 - 40 if y1 > 40 else 0
                y2 = y2 + 40 if y2 < height - 40 else height

                #self.save_pf(base, [x1, y1, x2, y2])
                min_x, min_y, max_x, max_y = textarea[0]
            else:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)

            brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                               (min_x, min_y, max_x, min_y,
                                                max_x, max_y, min_x, max_y)))
            pcgts.get_Page().set_Border(brd)

            # Use input_file's basename for the new file -
            # this way the files retain the same basenames:
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))
示例#14
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):
        LOG = getLogger('OcrdAnybaseocrBinarizer')
        raw = ocrolib.pil2array(page_image)
        if len(raw.shape) > 2:
            raw = np.mean(raw, 2)
        raw = raw.astype("float64")
        # perform image normalization
        image = raw - amin(raw)
        if amax(image) == amin(image):
            LOG.info("# image is empty: %s" % (page_id))
            return
        image /= amax(image)

        # check whether the image is already effectively binarized
        if self.parameter['gray']:
            extreme = 0
        else:
            extreme = (np.sum(image < 0.05) +
                       np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
        if extreme > 0.95:
            comment = "no-normalization"
            flat = image
        else:
            comment = ""
            # if not, we need to flatten it by estimating the local whitelevel
            LOG.info("Flattening")
            m = interpolation.zoom(image, self.parameter['zoom'])
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(self.parameter['range'], 2))
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(2, self.parameter['range']))
            m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
            if self.parameter['debug'] > 0:
                clf()
                imshow(m, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            w, h = minimum(array(image.shape), array(m.shape))
            flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
            if self.parameter['debug'] > 0:
                clf()
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])

        # estimate low and high thresholds
        LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        binarized = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
        LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment))
        LOG.info("writing")
        if self.parameter['debug'] > 0 or self.parameter['show']:
            clf()
            gray()
            imshow(binarized)
            ginput(1, max(0.1, self.parameter['debug']))

        page_xywh['features'] += ',binarized'

        bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_id = make_file_id(input_file, self.output_file_grp)
        file_path = self.workspace.save_image_file(
            bin_image,
            file_id + '-IMG',
            page_id=page_id,
            file_grp=self.output_file_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))
示例#15
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, model):
        LOG = getLogger('OcrdAnybaseocrTiseg')

        if model:

            I = ocrolib.pil2array(
                page_image.resize((800, 1024), Image.ANTIALIAS))
            I = np.array(I)[np.newaxis, :, :, :]
            LOG.info('I shape %s', I.shape)
            if len(I.shape) < 3:
                print('Wrong input shape. Image should have 3 channel')

            # get prediction
            #out = model.predict_segmentation(
            #    inp=I,
            #    out_fname="/tmp/out.png"
            #)
            out = model.predict(I)
            out = out.reshape((2048, 1600, 3)).argmax(axis=2)

            text_part = np.ones(out.shape)
            text_part[np.where(out == 1)] = 0

            image_part = np.ones(out.shape)
            image_part[np.where(out == 2)] = 0

            image_part = array(255 * (image_part), 'B')
            image_part = ocrolib.array2pil(image_part)

            text_part = array(255 * (text_part), 'B')
            text_part = ocrolib.array2pil(text_part)

            text_part = text_part.resize(page_image.size, Image.BICUBIC)
            image_part = image_part.resize(page_image.size, Image.BICUBIC)

        else:
            I = ocrolib.pil2array(page_image)

            if len(I.shape) > 2:
                I = np.mean(I, 2)
            I = 1 - I / I.max()
            rows, cols = I.shape

            # Generate Mask and Seed Images
            Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

            # Iseedfill: Union of Mask and Seed Images
            Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

            # Dilation of Iseedfill
            mask = ones((3, 3))
            Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

            # Expansion of Iseedfill to become equal in size of I
            Iseedfill = self.expansion(Iseedfill, (rows, cols))

            # Write Text and Non-Text images
            image_part = array((1 - I * Iseedfill), dtype=int)
            text_part = array((1 - I * (1 - Iseedfill)), dtype=int)

            bin_array = array(255 * (text_part > ocrolib.midrange(img_part)),
                              'B')
            text_part = ocrolib.array2pil(bin_array)

            bin_array = array(255 * (text_part > ocrolib.midrange(text_part)),
                              'B')
            image_part = ocrolib.array2pil(bin_array)

        file_id = make_file_id(input_file, self.output_file_grp)
        file_path = self.workspace.save_image_file(
            image_part,
            file_id + "_img",
            page_id=page_id,
            file_grp=self.output_file_grp,
        )
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features'] + ',non_text'))

        page_xywh['features'] += ',clipped'
        file_path = self.workspace.save_image_file(
            text_part,
            file_id + "_txt",
            page_id=page_id,
            file_grp=self.output_file_grp,
        )
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))