def to_train_val( self, locks: List[LockState], shuffle: bool = True, books: List[DatabaseBook] = None ) -> Tuple[List[PcGts], List[PcGts]]: if self.includeAllTrainingData: books = DatabaseBook.list_available() return dataset_by_locked_pages(self.nTrain, locks, shuffle, books)
print("Training the pixel classifier") self.pc_trainer.run(model_for_book, callback) print("Training Calamari") self.s2s_trainer.run(model_for_book, callback) print("Done") if __name__ == '__main__': import random import numpy as np random.seed(1) np.random.seed(1) b = DatabaseBook('demo') from omr.dataset.datafiles import dataset_by_locked_pages, LockState train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], True, [b]) output = 'models_out/test_pcs2s' params = SymbolDetectionDatasetParams( gt_required=True, height=40, dewarp=True, cut_region=False, pad=(0, 10, 0, 20), pad_power_of_2=None, center=True, staff_lines_only=True, ) train_params = SymbolDetectionTrainerParams( params, train_pcgts,
load=None if not self.params.model_to_load() else self.params.model_to_load().local_file('model'), display=self.params.display, output_dir=self.settings.model.path, model_name='model', early_stopping_max_performance_drops=self.params. early_stopping_max_keep, threads=self.params.processes, data_augmentation=False, loss=Loss.CATEGORICAL_CROSSENTROPY, monitor=Monitor.VAL_ACCURACY, ) trainer = Trainer(settings) trainer.train(callback=pc_callback) if __name__ == "__main__": from database import DatabaseBook from omr.dataset.datafiles import dataset_by_locked_pages, LockState book = DatabaseBook('Graduel_Fully_Annotated') train, val = dataset_by_locked_pages(0.8, [LockState(Locks.STAFF_LINES, True)], datasets=[book]) trainer = BasicStaffLinesTrainer( AlgorithmTrainerSettings( dataset_params=DatasetParams(), train_data=train, validation_data=val, )) trainer.train(book)
) """ if __name__ == '__main__': import random import numpy as np random.seed(1) np.random.seed(1) from omr.dataset.datafiles import dataset_by_locked_pages, LockState os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ommr4all.settings') import django django.setup() b = DatabaseBook('Pa_14819') train_pcgts, val_pcgts = dataset_by_locked_pages(0.9999, [LockState(Locks.LAYOUT, True)], True, [b]) trainer_params = CalamariTrainer.default_params() trainer_params.l_rate = 1e-3 trainer_params.load = '/home/ls6/wick/Documents/Projects/calamari_models/fraktur_historical_ligs/0.ckpt.json' params = DatasetParams( gt_required=True, height=48, cut_region=True, pad=[0, 10, 0, 20], #lyrics_normalization=LyricsNormalization.ONE_STRING, ) train_params = AlgorithmTrainerSettings( params, train_pcgts, val_pcgts,
if __name__ == '__main__': from omr.steps.step import Step, AlgorithmTypes from ommr4all.settings import BASE_DIR import random import cv2 import matplotlib.pyplot as plt from shared.pcgtscanvas import PcGtsCanvas from omr.dataset.datafiles import dataset_by_locked_pages, LockState random.seed(1) np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Gothic_Test') meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) # model = meta.newest_model_for_book(book) model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/pretrained_models/text_calamari/fraktur_historical', meta.type())) settings = AlgorithmPredictorSettings(model=model, ) pred = meta.create_predictor(settings) ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1]))
def run(self, task: Task, com_queue: Queue) -> dict: class Callback(TrainerCallback): def __init__(self): super().__init__() self.iter, self.loss, self.acc, self.best_iter, self.best_acc, self.best_iters = -1, -1, -1, -1, -1, -1 def resolving_files(self): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.RESOLVING_DATA, ))) def loading(self, n: int, total: int): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.LOADING_DATA, progress=n / total, n_processed=n, n_total=total, ))) def loading_started(self, total: int): pass def loading_finished(self, total: int): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.PREPARING_TRAINING, ))) def put(self): com_queue.put( TaskCommunicationData( task, TaskStatus( TaskStatusCodes.RUNNING, TaskProgressCodes.WORKING, progress=self.iter / self.total_iters, accuracy=self.best_acc if self.best_acc >= 0 else -1, early_stopping_progress=self.best_iters / self.early_stopping_iters if self.early_stopping_iters > 0 else -1, loss=self.loss, ))) def next_iteration(self, iter: int, loss: float, acc: float): self.iter, self.loss, self.acc = iter, loss, acc self.put() def next_best_model(self, best_iter: int, best_acc: float, best_iters: int): self.best_iter, self.best_acc, self.best_iters = best_iter, best_acc, best_iters self.put() def early_stopping(self): pass callback = Callback() logger.info("Finding PcGts files with valid ground truth") callback.resolving_files() train_pcgts, val_pcgts = dataset_by_locked_pages( self.params.nTrain, [LockState('Symbols', True)], datasets=[self.selection.book] if not self.params.includeAllTrainingData else []) if len(train_pcgts) + len(val_pcgts) < 50: # only very few files, use all for training and evaluate on training as-well train_pcgts = train_pcgts + val_pcgts val_pcgts = train_pcgts logger.info("Combining training and validation files because n<50") logger.info( "Starting training with {} training and {} validation files". format(len(train_pcgts), len(val_pcgts))) logger.debug("Training files: {}".format( [p.page.location.local_path() for p in train_pcgts])) logger.debug("Validation files: {}".format( [p.page.location.local_path() for p in val_pcgts])) meta = self.algorithm_meta() train, val = self.params.to_train_val( locks=[LockState('StaffLines', True)], books=[self.selection.book]) settings = AlgorithmTrainerSettings( train_data=train, validation_data=val, dataset_params=DatasetParams( gt_required=True, pad=None, pad_power_of_2=3, height=80, dewarp=False, cut_region=False, center=True, staff_lines_only=True, ), ) trainer = meta.create_trainer(settings) if self.params.pretrainedModel: trainer.settings.params.load = self.params.pretrainedModel.id trainer.train(self.selection.book, callback=callback) logger.info("Training finished for book {}".format( self.selection.book.local_path())) return {}