Пример #1
0
            def generate(self) -> Iterable[Sample]:
                # Depending on the mode, do not produce images or targets (force it for the future pipeline)
                if self.mode == PipelineMode.Prediction:
                    return map(lambda s: Sample(inputs=s.inputs, meta=s.meta),
                               reader.generate())
                elif self.mode == PipelineMode.Targets:
                    return map(
                        lambda s: Sample(targets=s.targets, meta=s.meta),
                        reader.generate())

                return reader.generate()
Пример #2
0
 def apply(self, sample: Sample) -> Sample:
     if sample.targets and 'gt' in sample.targets:
         sample.targets['sentence'] = "".join(
             self.params.codec.decode(sample.targets['gt']))
     if sample.outputs:
         outputs = self.ctc_decoder.decode(
             sample.outputs['softmax'].astype(float))
         outputs.labels = list(map(int, outputs.labels))
         outputs.sentence = "".join(self.params.codec.decode(
             outputs.labels))
         sample = sample.new_outputs(outputs)
     return sample
Пример #3
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert(inputs['img_len'].shape == (1,))
        assert(inputs['meta'].shape == (1,))
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs['img_len'] = inputs['img_len'][0]
        inputs['meta'] = inputs['meta'][0]

        def reshape_outputs(suffix):
            out_len = 'out_len' + suffix
            if out_len in outputs and outputs[out_len].shape == (1,):
                outputs[out_len] = outputs[out_len][0]

            for name in {'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'}:
                name += suffix
                if name in outputs:
                    outputs[name] = outputs[name][:outputs[out_len]]

        reshape_outputs('')
        for i in range(self.params.ensemble_):
            reshape_outputs(f"_{i}")

        return sample.new_inputs(inputs).new_outputs(outputs)
Пример #4
0
    def vote(self, sample: Sample) -> Sample:
        inputs, outputs, meta = sample.inputs, sample.outputs, sample.meta
        prediction_results = []
        input_meta = json.loads(inputs['meta'])

        def out_to_in(x: int) -> int:
            return self.out_to_in_transformer.local_to_global(
                x,
                model_factor=inputs['img_len'] / prediction.logits.shape[0],
                data_proc_params=input_meta)

        for i, (prediction, m, data, post_) in enumerate(
                zip(outputs, meta, self.datas, self.post_proc)):
            prediction.id = "fold_{}".format(i)
            prediction_results.append(
                PredictionResult(
                    prediction,
                    codec=data.params().codec,
                    text_postproc=post_,
                    out_to_in_trans=out_to_in,
                ))
        # vote the results (if only one model is given, this will just return the sentences)
        prediction = self.voter.vote_prediction_result(prediction_results)
        prediction.id = "voted"
        return Sample(inputs=inputs,
                      outputs=(prediction_results, prediction),
                      meta=input_meta)
Пример #5
0
    def apply(self, sample: Sample) -> Sample:
        if sample.targets and 'gt' in sample.targets:
            sample.targets['sentence'] = "".join(self.params.codec.decode(sample.targets['gt']))
        if sample.outputs:
            def decode(suffix):
                outputs = self.ctc_decoder.decode(sample.outputs['softmax' + suffix].astype(float))
                outputs.labels = list(map(int, outputs.labels))
                outputs.sentence = "".join(self.params.codec.decode(outputs.labels))
                return outputs

            outputs = decode("")
            outputs.voter_predictions = []
            for i in range(self.params.ensemble_):
                outputs.voter_predictions.append(decode(f"_{i}"))

            sample = sample.new_outputs(outputs)
        return sample
Пример #6
0
 def apply(self, sample: Sample) -> Sample:
     # data augmentation
     if not self.params.data_aug_params.no_augs() \
             and sample.inputs is not None \
             and self.data_augmenter \
             and np.random.rand() <= self.params.data_aug_params.to_rel():
         line, text = self.augment(sample.inputs, sample.targets,
                                   sample.meta)
         return sample.new_inputs(line).new_targets(text)
     return sample
Пример #7
0
    def apply(self, sample: Sample) -> Sample:
        codec = self.params.codec
        # final preparation
        text = np.array(
            codec.encode(sample.targets) if sample.targets else np.zeros(
                (0, ), dtype='int32'))
        line = sample.inputs

        # gray or binary input, add missing axis
        if len(line.shape) == 2:
            line = np.expand_dims(line, axis=-1)

        if line.shape[-1] != self.params.input_channels:
            raise ValueError(
                f"Expected {self.params.input_channels} channels but got {line.shape[-1]}. Shape of input {line.shape}"
            )

        if self.mode in {PipelineMode.Training, PipelineMode.Evaluation
                         } and not self.is_valid_line(
                             text,
                             len(line) // self.params.downscale_factor_):
            # skip longer outputs than inputs (also in evaluation due to loss computation)
            logger.warning(
                f"Skipping line with longer outputs than inputs (id={sample.meta['id']})"
            )
            return sample.new_invalid()

        if self.mode in {PipelineMode.Training, PipelineMode.Evaluation
                         } and len(text) == 0:
            logger.warning(
                f"Skipping empty line with empty GT (id={sample.meta['id']})")
            return sample.new_invalid()

        return sample.new_inputs({
            'img': line.astype(np.uint8),
            'img_len': [len(line)],
            'meta': [json.dumps(sample.meta)]
        }).new_targets({
            'gt': text,
            'gt_len': [len(text)],
            'fold_id': [sample.meta.get('fold_id', -1)]
        })
Пример #8
0
 def apply(self, sample: Sample) -> Sample:
     targets: str = sample.targets
     outputs: str = sample.outputs
     meta = sample.meta
     if isinstance(outputs, Prediction):
         prediction: Prediction = outputs
         prediction.sentence = self._apply_single(prediction.sentence, meta)
         return sample
     elif isinstance(targets, dict) and 'sentence' in targets:
         targets['sentence'] = self._apply_single(targets['sentence'], meta)
         return sample
     elif isinstance(outputs, dict) and 'sentence' in outputs:
         outputs['sentence'] = self._apply_single(outputs['sentence'], meta)
         return sample
     else:
         if targets:
             sample = sample.new_targets(self._apply_single(targets, meta))
         if outputs:
             sample = sample.new_outputs(self._apply_single(outputs, meta))
         return sample
Пример #9
0
    def vote_prediction_result_tuple(self, predictions):
        p = Prediction()
        p.is_voted_result = True
        self._apply_vote(predictions, p)

        # postprocessing after voting
        # option 1: Use custom text postprocessor
        # option 2: (Not implemented) Use only the first text postprocessor
        # option 3: Apply all known postprocessors and apply a sequence voting if different results are received
        if self.text_postproc:
            p.sentence = self.text_postproc.apply(
                Sample(inputs='', outputs=p.sentence)).outputs
        else:
            sentences = [
                pred.text_postproc.apply(Sample(inputs='',
                                                outputs=p.sentence)).outputs
                for pred in predictions
            ]

            if all([s == sentences[0] for s in sentences[1:]]):
                # usually all postproc should yield the same results
                p.sentence = sentences[0]
            else:
                # we need to vote again
                from calamari_ocr.ocr.voting import SequenceVoter
                sv = SequenceVoter()
                p.sentence = "".join(
                    [c for c, _ in sv.process_text(sentences)])

        p.avg_char_probability = 0
        for pos in p.positions:
            if len(pos.chars) > 0:
                p.avg_char_probability += pos.chars[0].probability
        p.avg_char_probability /= len(
            p.positions) if len(p.positions) > 0 else 1

        return p
Пример #10
0
    def multi_augment(self,
                      sample: Sample,
                      n_augmentations=1,
                      include_non_augmented=True):
        if include_non_augmented:
            out = [sample]
        else:
            out = []

        for n in range(n_augmentations):
            meta = copy.deepcopy(sample.meta)
            l, t = self.augment(sample.inputs, sample.targets, meta)
            out.append(Sample(inputs=l, targets=t, meta=meta))

        return out
Пример #11
0
    def apply(self, sample: Sample) -> Sample:
        inputs = sample.inputs
        outputs = sample.outputs
        assert (inputs['img_len'].shape == (1, ))
        assert (inputs['meta'].shape == (1, ))
        inputs = inputs.copy()
        outputs = outputs.copy()
        inputs['img_len'] = inputs['img_len'][0]
        inputs['meta'] = inputs['meta'][0]
        if 'out_len' in outputs and outputs['out_len'].shape == (1, ):
            outputs['out_len'] = outputs['out_len'][0]

        for name in {
                'logits', 'softmax', 'blank_last_logits', 'blank_last_softmax'
        }:
            if name in outputs:
                outputs[name] = outputs[name][:outputs['out_len']]
        return sample.new_inputs(inputs).new_outputs(outputs)
Пример #12
0
    def __init__(self,
                 prediction,
                 codec,
                 text_postproc,
                 out_to_in_trans: Callable[[int], int],
                 ground_truth=None):
        """ The output of a networks prediction (PredictionProto) with additional information

        It stores all required information for decoding (`codec`) and interpreting the output.

        Parameters
        ----------
        prediction : PredictionProto
            prediction the DNN
        codec : Codec
            codec required to decode the `prediction`
        text_postproc : TextPostprocessor
            text processor to apply to the decodec `prediction` to receive the actual prediction sentence
        """
        self.prediction = prediction
        self.logits = prediction.logits
        self.codec = codec
        self.text_postproc = text_postproc
        self.chars = codec.decode(prediction.labels)
        self.sentence = self.text_postproc.apply(
            Sample(inputs='', outputs="".join(self.chars))).outputs
        self.prediction.sentence = self.sentence
        self.out_to_in_trans = out_to_in_trans
        self.ground_truth = ground_truth

        self.prediction.avg_char_probability = 0

        for p in self.prediction.positions:
            for c in p.chars:
                c.char = codec.code2char[c.label]

            p.global_start = int(self.out_to_in_trans(p.local_start))
            p.global_end = int(self.out_to_in_trans(p.local_end))
            if len(p.chars) > 0:
                self.prediction.avg_char_probability += p.chars[0].probability

        self.prediction.avg_char_probability /= len(
            self.prediction.positions) if len(
                self.prediction.positions) > 0 else 1
Пример #13
0
 def to_input_target_sample(self) -> Sample:
     return Sample(inputs=self.image,
                   targets=self.gt,
                   meta=self.meta.to_dict())
Пример #14
0
 def apply(self, sample: Sample) -> Sample:
     return sample.new_inputs(self._apply_single(sample.inputs,
                                                 sample.meta))