示例#1
0
    def to_train_val(
            self,
            locks: List[LockState],
            shuffle: bool = True,
            books: List[DatabaseBook] = None
    ) -> Tuple[List[PcGts], List[PcGts]]:
        if self.includeAllTrainingData:
            books = DatabaseBook.list_available()

        return dataset_by_locked_pages(self.nTrain, locks, shuffle, books)
示例#2
0
        print("Training the pixel classifier")
        self.pc_trainer.run(model_for_book, callback)
        print("Training Calamari")
        self.s2s_trainer.run(model_for_book, callback)
        print("Done")


if __name__ == '__main__':
    import random
    import numpy as np
    random.seed(1)
    np.random.seed(1)
    b = DatabaseBook('demo')
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    train_pcgts, val_pcgts = dataset_by_locked_pages(
        0.8,
        [LockState(Locks.STAFF_LINES, True),
         LockState(Locks.LAYOUT, True)], True, [b])
    output = 'models_out/test_pcs2s'
    params = SymbolDetectionDatasetParams(
        gt_required=True,
        height=40,
        dewarp=True,
        cut_region=False,
        pad=(0, 10, 0, 20),
        pad_power_of_2=None,
        center=True,
        staff_lines_only=True,
    )
    train_params = SymbolDetectionTrainerParams(
        params,
        train_pcgts,
示例#3
0
            load=None if not self.params.model_to_load() else
            self.params.model_to_load().local_file('model'),
            display=self.params.display,
            output_dir=self.settings.model.path,
            model_name='model',
            early_stopping_max_performance_drops=self.params.
            early_stopping_max_keep,
            threads=self.params.processes,
            data_augmentation=False,
            loss=Loss.CATEGORICAL_CROSSENTROPY,
            monitor=Monitor.VAL_ACCURACY,
        )
        trainer = Trainer(settings)
        trainer.train(callback=pc_callback)


if __name__ == "__main__":
    from database import DatabaseBook
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    book = DatabaseBook('Graduel_Fully_Annotated')
    train, val = dataset_by_locked_pages(0.8,
                                         [LockState(Locks.STAFF_LINES, True)],
                                         datasets=[book])
    trainer = BasicStaffLinesTrainer(
        AlgorithmTrainerSettings(
            dataset_params=DatasetParams(),
            train_data=train,
            validation_data=val,
        ))
    trainer.train(book)
示例#4
0
                          )

        """
if __name__ == '__main__':
    import random
    import numpy as np
    random.seed(1)
    np.random.seed(1)

    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ommr4all.settings')
    import django
    django.setup()
    b = DatabaseBook('Pa_14819')

    train_pcgts, val_pcgts = dataset_by_locked_pages(0.9999, [LockState(Locks.LAYOUT, True)], True, [b])
    trainer_params = CalamariTrainer.default_params()
    trainer_params.l_rate = 1e-3
    trainer_params.load = '/home/ls6/wick/Documents/Projects/calamari_models/fraktur_historical_ligs/0.ckpt.json'

    params = DatasetParams(
        gt_required=True,
        height=48,
        cut_region=True,
        pad=[0, 10, 0, 20],
        #lyrics_normalization=LyricsNormalization.ONE_STRING,
    )
    train_params = AlgorithmTrainerSettings(
        params,
        train_pcgts,
        val_pcgts,
示例#5
0
if __name__ == '__main__':
    from omr.steps.step import Step, AlgorithmTypes
    from ommr4all.settings import BASE_DIR
    import random
    import cv2
    import matplotlib.pyplot as plt
    from shared.pcgtscanvas import PcGtsCanvas
    from omr.dataset.datafiles import dataset_by_locked_pages, LockState
    random.seed(1)
    np.random.seed(1)
    if False:
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            0.8,
            [LockState(Locks.SYMBOLS, True),
             LockState(Locks.LAYOUT, True)],
            True,
            [
                # DatabaseBook('Graduel_Part_1'),
                # DatabaseBook('Graduel_Part_2'),
                # DatabaseBook('Graduel_Part_3'),
            ])
    book = DatabaseBook('Gothic_Test')
    meta = Step.meta(AlgorithmTypes.OCR_CALAMARI)
    # model = meta.newest_model_for_book(book)
    model = Model(
        MetaId.from_custom_path(
            BASE_DIR +
            '/internal_storage/pretrained_models/text_calamari/fraktur_historical',
            meta.type()))
    settings = AlgorithmPredictorSettings(model=model, )
    pred = meta.create_predictor(settings)
    ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1]))
    def run(self, task: Task, com_queue: Queue) -> dict:
        class Callback(TrainerCallback):
            def __init__(self):
                super().__init__()
                self.iter, self.loss, self.acc, self.best_iter, self.best_acc, self.best_iters = -1, -1, -1, -1, -1, -1

            def resolving_files(self):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.RESOLVING_DATA,
                        )))

            def loading(self, n: int, total: int):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.LOADING_DATA,
                            progress=n / total,
                            n_processed=n,
                            n_total=total,
                        )))

            def loading_started(self, total: int):
                pass

            def loading_finished(self, total: int):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.PREPARING_TRAINING,
                        )))

            def put(self):
                com_queue.put(
                    TaskCommunicationData(
                        task,
                        TaskStatus(
                            TaskStatusCodes.RUNNING,
                            TaskProgressCodes.WORKING,
                            progress=self.iter / self.total_iters,
                            accuracy=self.best_acc
                            if self.best_acc >= 0 else -1,
                            early_stopping_progress=self.best_iters /
                            self.early_stopping_iters
                            if self.early_stopping_iters > 0 else -1,
                            loss=self.loss,
                        )))

            def next_iteration(self, iter: int, loss: float, acc: float):
                self.iter, self.loss, self.acc = iter, loss, acc
                self.put()

            def next_best_model(self, best_iter: int, best_acc: float,
                                best_iters: int):
                self.best_iter, self.best_acc, self.best_iters = best_iter, best_acc, best_iters
                self.put()

            def early_stopping(self):
                pass

        callback = Callback()

        logger.info("Finding PcGts files with valid ground truth")
        callback.resolving_files()
        train_pcgts, val_pcgts = dataset_by_locked_pages(
            self.params.nTrain, [LockState('Symbols', True)],
            datasets=[self.selection.book]
            if not self.params.includeAllTrainingData else [])
        if len(train_pcgts) + len(val_pcgts) < 50:
            # only very few files, use all for training and evaluate on training as-well
            train_pcgts = train_pcgts + val_pcgts
            val_pcgts = train_pcgts
            logger.info("Combining training and validation files because n<50")

        logger.info(
            "Starting training with {} training and {} validation files".
            format(len(train_pcgts), len(val_pcgts)))
        logger.debug("Training files: {}".format(
            [p.page.location.local_path() for p in train_pcgts]))
        logger.debug("Validation files: {}".format(
            [p.page.location.local_path() for p in val_pcgts]))

        meta = self.algorithm_meta()
        train, val = self.params.to_train_val(
            locks=[LockState('StaffLines', True)], books=[self.selection.book])

        settings = AlgorithmTrainerSettings(
            train_data=train,
            validation_data=val,
            dataset_params=DatasetParams(
                gt_required=True,
                pad=None,
                pad_power_of_2=3,
                height=80,
                dewarp=False,
                cut_region=False,
                center=True,
                staff_lines_only=True,
            ),
        )

        trainer = meta.create_trainer(settings)
        if self.params.pretrainedModel:
            trainer.settings.params.load = self.params.pretrainedModel.id
        trainer.train(self.selection.book, callback=callback)
        logger.info("Training finished for book {}".format(
            self.selection.book.local_path()))
        return {}