Exemplo n.º 1
0
 def test_raw_prediction(self):
     args = PredictionAttrs()
     predictor = Predictor.from_checkpoint(PredictorParams(
         progress_bar=False, silent=True),
                                           checkpoint=args.checkpoint[0])
     images = [load_image(file) for file in args.files]
     for result in predictor.predict_raw(images):
         self.assertGreater(result.outputs.avg_char_probability, 0)
Exemplo n.º 2
0
 def test_raw_prediction_voted(self):
     args = PredictionAttrs()
     predictor = MultiPredictor.from_paths(checkpoints=args.checkpoint, predictor_params=PredictorParams(progress_bar=False, silent=True))
     images = [load_image(file) for file in args.files]
     for sample in predictor.predict_raw(images):
         r, voted = sample.outputs
         print([rn.sentence for rn in r])
         print(voted.sentence)
Exemplo n.º 3
0
 def test_raw_prediction(self):
     args = PredictionAttrs()
     predictor = Predictor.from_checkpoint(PredictorParams(
         progress_bar=False, silent=True),
                                           checkpoint=args.checkpoint[0])
     images = [load_image(file) for file in args.files]
     for file, image in zip(args.files, images):
         _, prediction, _ = list(predictor.predict_raw([image]))[0]
         print(file, prediction.sentence)
Exemplo n.º 4
0
    def _load_line(self, image_path):
        if image_path is None:
            return None

        if not os.path.exists(image_path):
            if self._non_existing_as_empty:
                return np.zeros((1, 1), dtype=np.uint8)
            else:
                raise Exception(
                    "Image file at '{}' does not exist".format(image_path))

        try:
            img = load_image(image_path)
        except:
            return None

        return img
Exemplo n.º 5
0
    def _generate_epoch(self, text_only) -> Generator[InputSample, None, None]:
        fold_id = -1
        for p, page in enumerate(self.book.pages):
            if self.mode in INPUT_PROCESSOR:
                img = load_image(page.imgFile)
                if self.binary:
                    img = img > 0.9
            else:
                img = None

            for l, line in enumerate(page.getLines()):
                for f, fo in enumerate(line.formats):
                    fold_id += 1
                    sample_id = "{}_{}_{}_{}".format(
                        os.path.splitext(
                            page.xmlFile if page.xmlFile else page.imgFile)[0],
                        p, l, f)
                    text = None
                    if self.mode in TARGETS_PROCESSOR:
                        text = fo.text

                    if text_only:
                        yield InputSample(
                            None, text,
                            SampleMeta(id=sample_id, fold_id=fold_id))

                    else:
                        cut_img = None
                        if self.mode in INPUT_PROCESSOR:
                            ly, lx = img.shape

                            # Cut the Image
                            cut_img = img[line.rect.top:-ly + line.rect.bottom,
                                          line.rect.left:-lx + line.rect.right]

                            # add padding as required from normal files
                            cut_img = np.pad(cut_img, ((3, 3), (0, 0)),
                                             mode='constant',
                                             constant_values=cut_img.max())

                        yield InputSample(
                            cut_img, text,
                            SampleMeta(id=sample_id, fold_id=fold_id))
Exemplo n.º 6
0
    def _load_sample(self, sample, text_only):
        loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty,
                                      self.text_index, self.skip_invalid)
        image_path, xml_path = sample

        img = None
        if self.mode == DataSetMode.PREDICT or self.mode == DataSetMode.TRAIN or self.mode == DataSetMode.PRED_AND_EVAL:
            img = load_image(image_path)

        for sample in loader.load(image_path, xml_path):
            text = sample["text"]
            orientation = sample["orientation"]

            if not text_only and (self.mode == DataSetMode.PREDICT
                                  or self.mode == DataSetMode.TRAIN
                                  or self.mode == DataSetMode.PRED_AND_EVAL):
                ly, lx = img.shape[:2]

                line_img = PageXMLDataset.cutout(img, sample['coords'],
                                                 lx / sample['img_width'])

                # rotate by orientation angle in clockwise direction to correct present skew
                # (skimage rotates in counter-clockwise direction)
                if orientation and orientation % 360 != 0:
                    line_img = rotate(line_img,
                                      orientation * -1,
                                      resize=True,
                                      mode='constant',
                                      cval=line_img.max(),
                                      preserve_range=True).astype(np.uint8)

                # add padding as required from normal files
                if self.args.get('pad', None):
                    pad = self.args['pad']
                    img = np.pad(img,
                                 pad,
                                 mode='constant',
                                 constant_values=img.max())
            else:
                line_img = None

            yield line_img, text
Exemplo n.º 7
0
    def _load_sample(self, sample,
                     text_only) -> Generator[InputSample, None, None]:
        loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty,
                                      self.text_index, self.skip_invalid)
        image_path, xml_path = sample

        img = None
        if self.mode in INPUT_PROCESSOR:
            img = load_image(image_path)

        for sample in loader.load(image_path, xml_path):
            text = sample["text"]
            orientation = sample["orientation"]

            if not text_only and self.mode in INPUT_PROCESSOR:
                ly, lx = img.shape[:2]

                line_img = PageXMLReader.cutout(img, sample['coords'],
                                                lx / sample['img_width'])

                # rotate by orientation angle in clockwise direction to correct present skew
                # (skimage rotates in counter-clockwise direction)
                if orientation and orientation % 360 != 0:
                    line_img = rotate(line_img,
                                      orientation * -1,
                                      resize=True,
                                      mode='constant',
                                      cval=line_img.max(),
                                      preserve_range=True).astype(np.uint8)

                # add padding as required from normal files
                if self.args.pad:
                    pad = self.args.pad
                    img = np.pad(img,
                                 pad,
                                 mode='constant',
                                 constant_values=img.max())
            else:
                line_img = None

            yield InputSample(line_img, text, SampleMeta(id=sample['id']))
Exemplo n.º 8
0
    def _load_sample(self, sample,
                     text_only) -> Generator[InputSample, None, None]:
        loader = PageXMLDatasetLoader(self.mode, self._non_existing_as_empty,
                                      self.text_index, self.skip_invalid)
        image_path, xml_path, idx = sample

        img = None
        if self.mode in INPUT_PROCESSOR:
            img = load_image(image_path)

        for i, sample in enumerate(loader.load(image_path, xml_path)):
            fold_id = (idx + i) % self.n_folds if self.n_folds > 0 else -1
            text = sample["text"]
            orientation = sample["orientation"]

            if not text_only and self.mode in INPUT_PROCESSOR:
                ly, lx = img.shape[:2]

                # rotate by orientation angle in clockwise direction to correct present skew
                angle = orientation if orientation and orientation % 360 != 0 else 0

                line_img = PageXMLReader.cutout(img,
                                                sample['coords'],
                                                mode=CutMode.POLYGON,
                                                angle=angle,
                                                cval=None,
                                                scale=lx / sample['img_width'])

                # add padding as required from normal files
                if self.args.pad:
                    pad = self.args.pad
                    img = np.pad(img,
                                 pad,
                                 mode='constant',
                                 constant_values=img.max())
            else:
                line_img = None

            yield InputSample(line_img, text,
                              SampleMeta(id=sample['id'], fold_id=fold_id))
Exemplo n.º 9
0
class DefaultDataAugmenter(DataAugmenterBase[DefaultDataAugmenterParams]):
    def augment_single(self, data, gt_txt):
        import calamari_ocr.thirdparty.ocrodeg as ocrodeg
        original_dtype = data.dtype
        data = data.astype(np.float)
        m = data.max()
        data = data / (1 if m == 0 else m)
        data = ocrodeg.random_pad(data, (0, data.shape[1] * 2))
        # data = ocrodeg.transform_image(data, **ocrodeg.random_transform(rotation=(-0.1, 0.1), translation=(-0.1, 0.1)))
        for sigma in [2, 5]:
            noise = ocrodeg.bounded_gaussian_noise(data.shape, sigma, 3.0)
            data = ocrodeg.distort_with_noise(data, noise)

        data = ocrodeg.printlike_multiscale(data, blur=1, inverted=True)
        data = (data * 255 / data.max()).astype(original_dtype)
        return data, gt_txt


if __name__ == '__main__':
    import matplotlib.pyplot as plt

    aug = DefaultDataAugmenterParams().create()
    img = 255 - np.mean(load_image("../../test/data/uw3_50lines/train/010001.bin.png")[:, :, 0:2], axis=-1)
    aug_img = [aug.augment_single(img.T, '')[0].T for _ in range(4)]
    f, ax = plt.subplots(5, 1)
    ax[0].imshow(255 - img, cmap='gray')
    for i, x in enumerate(aug_img):
        ax[i + 1].imshow(255 - x, cmap='gray')
    plt.show()
Exemplo n.º 10
0
        data = data.astype(np.float)
        m = data.max()
        data = data / (1 if m == 0 else m)
        data = ocrodeg.random_pad(data, (0, data.shape[1] * 2))
        # data = ocrodeg.transform_image(data, **ocrodeg.random_transform(rotation=(-0.1, 0.1), translation=(-0.1, 0.1)))
        for sigma in [2, 5]:
            noise = ocrodeg.bounded_gaussian_noise(data.shape, sigma, 3.0)
            data = ocrodeg.distort_with_noise(data, noise)

        data = ocrodeg.printlike_multiscale(data, blur=1, inverted=True)
        data = (data * 255 / data.max()).astype(original_dtype)
        return data, gt_txt


NoopDataAugmenter.register()
SimpleDataAugmenter.register()

if __name__ == '__main__':
    aug = SimpleDataAugmenter()
    img = 255 - np.mean(
        load_image("../../test/data/uw3_50lines/train/010001.bin.png")[:, :,
                                                                       0:2],
        axis=-1)
    aug_img = [aug.augment_single(img.T, '')[0].T for _ in range(4)]
    import matplotlib.pyplot as plt
    f, ax = plt.subplots(5, 1)
    ax[0].imshow(255 - img, cmap='gray')
    for i, x in enumerate(aug_img):
        ax[i + 1].imshow(255 - x, cmap='gray')
    plt.show()