Пример #1
0
class TextSeparation(object):

    name = "Text Separation"

    def __init__(self, globalConfig={}, config={}):
        self.globalConfig = Configuration(globalConfig, GLOBAL_DEFAULTS)
        self.config = Configuration(config, DEFAULTS)
        self.modelConfig = Configuration.load(self.config["model_path"],
                                              "algorithm")
        self._configure_dataset()
        self._configure_algorithm()
        self._configure_executor()

    def _configure_algorithm(self):
        self.algorithm = TFUnet(self.modelConfig["algo_config"])
        self.algorithm.set_cpu(self.globalConfig['gpu'] == -1)
        self.algorithm.configure(
            slice_width=self.modelConfig['data_config.slice_width'],
            slice_height=self.modelConfig['data_config.slice_height'])

    def _configure_dataset(self):
        self.dataset = PaperNoteSlicesSingle(
            slice_width=self.modelConfig['data_config.slice_width'],
            slice_height=self.modelConfig['data_config.slice_height'],
            binarize=self.modelConfig.default("binary", False),
            binarize_method=self.config["binarize_method"])

    def _configure_executor(self):
        self.executor = Executor(self.algorithm, False, self.globalConfig)
        self.executor.configure(
            softplacement=not self.globalConfig["hardplacement"],
            logplacement=self.globalConfig["logplacement"],
            device=self.globalConfig["gpu"])
        self.executor.restore(
            os.path.join(self.config["model_path"],
                         "model-{}".format(self.config["model_epoch"])))
        self.separator = SeparationRunner(config=self.modelConfig,
                                          dataset=self.dataset,
                                          subset="")
        self.executables = [self.separator]

    def __call__(self, image, file):
        original = self.dataset.set_image(image).copy()
        self.executor(self.executables, auto_close=False)
        outputs = np.argmax(np.asarray(self.separator.outputs), 3)
        merged = self.dataset.merge_slices(outputs, original.shape)
        output = (255 - (1 - merged) * (255 - original))
        return np.uint8(output)

    def close(self):
        self.executor.close()
Пример #2
0
class TranscriptionAndClassification(object):
    def __init__(self, globalConfig={}, config={}):
        self.globalConfig = Configuration(globalConfig, GLOBAL_DEFAULTS)
        self.config = Configuration(config, DEFAULTS)
        self.modelConfig = Configuration.load(self.config["model_path"],
                                              "algorithm")
        self._configure_dataset()
        self._configure_algorithm()
        self._configure_executor()

    def _configure_algorithm(self):
        self.algorithm = HtrNet(self.modelConfig["algo_config"])
        self.algorithm.set_cpu(self.globalConfig['gpu'] == -1)
        self.algorithm.configure(
            batch_size=self.modelConfig['batch'],
            learning_rate=self.modelConfig['learning_rate'],
            sequence_length=self.dataset.max_length,
            image_height=self.dataset.meta["height"],
            image_width=self.dataset.meta["width"],
            vocab_length=self.dataset.vocab_length,
            channels=self.dataset.channels,
            class_learning_rate=self.modelConfig.default(
                'class_learning_rate', self.modelConfig['learning_rate']))

    def _configure_dataset(self):
        self.dataset = RegionDataset(
            None,
            self.config["model_path"],
            data_config=self.modelConfig["data_config"])
        self.dataset.scaling(self.config["scaling"], self.config["max_height"],
                             self.config["max_width"])

    def _configure_executor(self):
        self.executor = Executor(self.algorithm, False, self.globalConfig)
        self.executor.configure(
            softplacement=not self.globalConfig["hardplacement"],
            logplacement=self.globalConfig["logplacement"],
            device=self.globalConfig["gpu"])
        self.executor.restore(
            os.path.join(self.config["model_path"],
                         "model-{}".format(self.config["model_epoch"])))
        if self.config["classify"]:
            self.transcriber = RecClassRunner(self.dataset,
                                              config=self.modelConfig)
        else:
            self.transcriber = RecognitionRunner(self.dataset,
                                                 config=self.modelConfig)
        self.executables = [self.transcriber]

    def __call__(self, images, file):
        self.dataset.set_regions(images)
        self.executor(self.executables, auto_close=False)
        for idx in range(len(self.transcriber.transcriptions['trans'])):
            text = self.transcriber.transcriptions['trans'][idx]
            images[idx].set_text(text, self.dataset.decompile(text))
            if self.config["classify"]:
                score = self.transcriber.transcriptions['class'][idx]
                images[idx].set_class(score,
                                      score > self.config["class_thresh"])
        return images

    def close(self):
        self.executor.close()