Exemplo n.º 1
0
    def apply(self, sample: Sample) -> Sample:
        assert self.data_params.downscale_factor > 0  # Not instantiated yet
        codec = self.data_params.codec
        # final preparation
        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION}:
            text = np.array(codec.encode(sample.targets) if sample.targets else np.zeros((0,), dtype="int32"))
        else:
            text = None

        line = sample.inputs

        # gray or binary input, add missing axis
        if len(line.shape) == 2:
            line = np.expand_dims(line, axis=-1)

        # Validate if the line is valid for training
        if not self.is_valid_line(
            text, len(line) // self.data_params.downscale_factor, len(line), sample.meta.get("id", "Unknown Sample ID")
        ):
            return sample.new_invalid()

        if text is not None:
            sample = sample.new_targets(
                {
                    "gt": np.asarray(text),
                    "gt_len": np.asarray([len(text)]),
                    "fold_id": np.asarray([sample.meta.get("fold_id", -1)]),
                }
            )

        return sample.new_inputs({"img": line.astype(np.uint8), "img_len": np.asarray([len(line)])})
Exemplo n.º 2
0
    def vote_prediction_result_tuple(self, predictions):
        p = Prediction()
        p.is_voted_result = True
        self._apply_vote(predictions, p)

        # postprocessing after voting
        # option 1: Use custom text postprocessor
        # option 2: (Not implemented) Use only the first text postprocessor
        # option 3: Apply all known postprocessors and apply a sequence voting if different results are received
        if self.text_postproc:
            p.sentence = self.text_postproc.apply_on_sample(Sample(inputs="", outputs=p.sentence)).outputs
        else:
            sentences = [
                pred.text_postproc.apply_on_sample(Sample(inputs="", outputs=p.sentence)).outputs
                for pred in predictions
            ]

            if all([s == sentences[0] for s in sentences[1:]]):
                # usually all postproc should yield the same results
                p.sentence = sentences[0]
            else:
                # we need to vote again
                from calamari_ocr.ocr.voting import SequenceVoter

                sv = SequenceVoter()
                p.sentence = "".join([c for c, _ in sv.process_text(sentences)])

        p.avg_char_probability = 0
        for pos in p.positions:
            if len(pos.chars) > 0:
                p.avg_char_probability += pos.chars[0].probability
        p.avg_char_probability /= len(p.positions) if len(p.positions) > 0 else 1

        return p
Exemplo n.º 3
0
            def generate(self) -> Iterable[Sample]:
                # Depending on the mode, do not produce images or targets (force it for the future pipeline)
                if self.mode == PipelineMode.PREDICTION:
                    return map(lambda s: Sample(inputs=s.inputs, meta=s.meta), reader.generate())
                elif self.mode == PipelineMode.TARGETS:
                    return map(lambda s: Sample(targets=s.targets, meta=s.meta), reader.generate())

                return reader.generate()
Exemplo n.º 4
0
 def apply(self, sample: Sample) -> Sample:
     try:
         return sample.new_inputs(
             self._apply_single(sample.inputs, sample.meta))
     except Exception as e:
         logger.exception(e)
         logger.warning(
             "There was an unknown error when processing a line image. The line is skipped.\n"
             f"The error was caused by the line with meta data: {sample.meta}.\n"
             f"Please report this as an issue including the meta data, stack trace,  the respective "
             f"image file and call.\n"
             f"You can ignore this error if it occurs only very rarely, only this particular line will "
             f"be skipped.")
         return sample.new_invalid()
Exemplo n.º 5
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert inputs["img_len"].shape == (1, )
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs["img_len"] = inputs["img_len"][0]

        def reshape_outputs(suffix):
            out_len = "out_len" + suffix
            if out_len in outputs and outputs[out_len].shape == (1, ):
                outputs[out_len] = outputs[out_len][0]

            for name in {
                    "logits",
                    "softmax",
                    "blank_last_logits",
                    "blank_last_softmax",
            }:
                name += suffix
                if name in outputs:
                    outputs[name] = outputs[name][:outputs[out_len]]

        reshape_outputs("")
        for i in range(self.data_params.ensemble):
            reshape_outputs(f"_{i}")

        return sample.new_inputs(inputs).new_outputs(outputs)
Exemplo n.º 6
0
    def vote(self, sample: Sample) -> Sample:
        inputs, outputs, meta = sample.inputs, sample.outputs, sample.meta
        prediction_results = []

        def out_to_in(x: int) -> int:
            return self.out_to_in_transformer.local_to_global(
                x,
                model_factor=inputs["img_len"] / prediction.logits.shape[0],
                data_proc_params=meta,
            )

        for i, (prediction, m, data, post_) in enumerate(zip(outputs, meta, self.datas, self.post_proc)):
            prediction.id = f"fold_{i}"
            prediction_results.append(
                PredictionResult(
                    prediction,
                    codec=data.params.codec,
                    text_postproc=post_,
                    out_to_in_trans=out_to_in,
                )
            )
        # vote the results (if only one model is given, this will just return the sentences)
        prediction = self.voter.vote_prediction_result(prediction_results)
        prediction.id = "voted"
        return Sample(inputs=inputs, outputs=(prediction_results, prediction), meta=meta[0])
Exemplo n.º 7
0
 def apply(self, sample: Sample) -> Sample:
     # data augmentation
     if not self.data_aug_params.no_augs() \
             and sample.inputs is not None \
             and self.data_augmenter \
             and np.random.rand() <= self.data_aug_params.to_rel():
         line, text = self.augment(sample.inputs, sample.targets, sample.meta)
         return sample.new_inputs(line).new_targets(text)
     return sample
Exemplo n.º 8
0
 def apply(self, sample: Sample) -> Sample:
     targets: str = sample.targets
     outputs: str = sample.outputs
     meta = sample.meta
     if isinstance(outputs, Prediction):
         prediction: Prediction = outputs
         prediction.sentence = self._apply_single(prediction.sentence, meta)
         return sample
     elif isinstance(targets, dict) and "sentence" in targets:
         targets["sentence"] = self._apply_single(targets["sentence"], meta)
         return sample
     elif isinstance(outputs, dict) and "sentence" in outputs:
         outputs["sentence"] = self._apply_single(outputs["sentence"], meta)
         return sample
     else:
         if targets:
             sample = sample.new_targets(self._apply_single(targets, meta))
         if outputs:
             sample = sample.new_outputs(self._apply_single(outputs, meta))
         return sample
Exemplo n.º 9
0
    def multi_augment(self, sample: Sample, n_augmentations=1, include_non_augmented=True):
        if include_non_augmented:
            out = [sample]
        else:
            out = []

        for n in range(n_augmentations):
            meta = copy.deepcopy(sample.meta)
            l, t = self.augment(sample.inputs, sample.targets, meta)
            out.append(Sample(inputs=l, targets=t, meta=meta))

        return out
Exemplo n.º 10
0
    def apply(self, sample: Sample) -> Sample:
        if sample.targets and 'gt' in sample.targets:
            sample.targets['sentence'] = "".join(
                self.data_params.codec.decode(sample.targets['gt']))
        if sample.outputs:

            def decode(suffix):
                outputs = self.ctc_decoder.decode(
                    sample.outputs['softmax' + suffix].astype(float))
                outputs.labels = list(map(int, outputs.labels))
                outputs.sentence = "".join(
                    self.data_params.codec.decode(outputs.labels))
                return outputs

            outputs = decode("")
            outputs.voter_predictions = []
            for i in range(self.data_params.ensemble):
                outputs.voter_predictions.append(decode(f"_{i}"))

            sample = sample.new_outputs(outputs)
        return sample
Exemplo n.º 11
0
    def apply(self, sample: Sample) -> Sample:
        assert (self.data_params.downscale_factor > 0)  # Not instantiated yet
        codec = self.data_params.codec
        # final preparation
        text = np.array(
            codec.encode(sample.targets) if sample.targets else np.zeros(
                (0, ), dtype='int32'))
        line = sample.inputs

        # gray or binary input, add missing axis
        if len(line.shape) == 2:
            line = np.expand_dims(line, axis=-1)

        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION
                         } and not self.is_valid_line(
                             text,
                             len(line) // self.data_params.downscale_factor):
            # skip longer outputs than inputs (also in evaluation due to loss computation)
            logger.warning(
                f"Skipping line with longer outputs than inputs (id={sample.meta['id']})"
            )
            return sample.new_invalid()

        if self.mode in {PipelineMode.TRAINING, PipelineMode.EVALUATION
                         } and len(text) == 0:
            logger.warning(
                f"Skipping empty line with empty GT (id={sample.meta['id']})")
            return sample.new_invalid()

        return sample.new_inputs({
            'img': line.astype(np.uint8),
            'img_len': [len(line)]
        }).new_targets({
            'gt': text,
            'gt_len': [len(text)],
            'fold_id': [sample.meta.get('fold_id', -1)]
        })
Exemplo n.º 12
0
    def apply(self, sample: Sample) -> Optional[Sample]:
        # Apply the complete list of data processors
        # Non valid samples return None

        if sample.meta is None:
            sample = sample.new_meta({})

        if not self.is_valid_sample(sample):
            return None

        for p in self.processors:
            sample = p(sample)
            if not self.is_valid_sample(sample):
                return None

        return sample
Exemplo n.º 13
0
    def __init__(
        self,
        prediction,
        codec,
        text_postproc,
        out_to_in_trans: Callable[[int], int],
        ground_truth=None,
    ):
        """The output of a networks prediction (PredictionProto) with additional information

        It stores all required information for decoding (`codec`) and interpreting the output.

        Parameters
        ----------
        prediction : PredictionProto
            prediction the DNN
        codec : Codec
            codec required to decode the `prediction`
        text_postproc : TextPostprocessor
            text processor to apply to the decodec `prediction` to receive the actual prediction sentence
        """
        self.prediction = prediction
        self.logits = prediction.logits
        self.codec = codec
        self.text_postproc = text_postproc
        self.chars = codec.decode(prediction.labels)
        self.sentence = self.text_postproc.apply_on_sample(Sample(inputs="", outputs="".join(self.chars))).outputs
        self.prediction.sentence = self.sentence
        self.out_to_in_trans = out_to_in_trans
        self.ground_truth = ground_truth

        self.prediction.avg_char_probability = 0

        for p in self.prediction.positions:
            for c in p.chars:
                c.char = codec.code2char[c.label]

            p.global_start = int(self.out_to_in_trans(p.local_start))
            p.global_end = int(self.out_to_in_trans(p.local_end))
            if len(p.chars) > 0:
                self.prediction.avg_char_probability += p.chars[0].probability

        self.prediction.avg_char_probability /= (
            len(self.prediction.positions) if len(self.prediction.positions) > 0 else 1
        )
Exemplo n.º 14
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert (inputs['img_len'].shape == (1,))
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs['img_len'] = inputs['img_len'][0]

        def reshape_outputs(suffix):
            out_len = 'out_len' + suffix
            if out_len in outputs and outputs[out_len].shape == (1,):
                outputs[out_len] = outputs[out_len][0]

            for name in {'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'}:
                name += suffix
                if name in outputs:
                    outputs[name] = outputs[name][:outputs[out_len]]

        reshape_outputs('')
        for i in range(self.data_params.ensemble):
            reshape_outputs(f"_{i}")

        return sample.new_inputs(inputs).new_outputs(outputs)
Exemplo n.º 15
0
 def apply_on_sample(self, sample: Sample) -> Sample:
     if sample.meta is None:
         sample = sample.new_meta({})
     return self.apply(sample.copy())
Exemplo n.º 16
0
def is_valid_sample(sample: Sample, mode: PipelineMode) -> bool:
    if sample is None:
        return False
    return sample.is_valid(mode)
Exemplo n.º 17
0
 def to_input_target_sample(self) -> Sample:
     return Sample(inputs=self.image, targets=self.gt, meta=self.meta.to_dict())
Exemplo n.º 18
0
 def apply(self, sample: Sample) -> Sample:
     return sample.new_inputs(self._apply_single(sample.inputs,
                                                 sample.meta))
Exemplo n.º 19
0
    @staticmethod
    def cls() -> Type['TextProcessor']:
        return TextRegularizerProcessor


class TextRegularizerProcessor(TextProcessor[TextRegularizerProcessorParams]):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, *kwargs)
        if self.params.replacements is None:
            self.params.replacements = default_text_regularizer_replacements(
                self.params.replacement_groups)

    def _apply_single(self, txt, meta):
        for replacement in self.params.replacements:
            if replacement.regex:
                txt = re.sub(replacement.old, replacement.new, txt)
            else:
                txt = txt.replace(replacement.old, replacement.new)

        return txt


if __name__ == "__main__":
    n = TextRegularizerProcessorParams(
        replacement_groups=["quotes", "spaces"]).create(
            None, mode=PipelineMode.TRAINING)
    assert (n(
        Sample(targets="“Resolve quotes”")).targets == "''Resolve quotes''")
    assert (n(Sample(
        targets="  “Resolve   spaces  ”   ")).targets == "''Resolve spaces ''")