def run(self, n_train: int, val_amount: float, cross_folds: int, single_folds: List[int], train_books: List[str], test_books: List[str], train_books_extend: List[str] ): from omr.steps.step import Step global_args = self.global_args logger.info("Finding PcGts files with valid ground truth") train_args = generate_dataset( lock_states=[LockState(Locks.STAFF_LINES, True), LockState(Locks.LAYOUT, True)], n_train=n_train, val_amount=val_amount, cross_folds=cross_folds, single_folds=single_folds, train_books=train_books, test_books=test_books, train_books_extend=train_books_extend, ) train_args = [SingleDataArgs(gd.fold, os.path.join(global_args.model_dir, '{}_{}'.format(global_args.algorithm_type.value, gd.fold)), gd.train_pcgts_files, gd.validation_pcgts_files, gd.test_pcgts_files, global_args) for gd in train_args] experimenter_class = Step.meta(self.global_args.algorithm_type).experimenter() results = [experimenter_class(args, logger).run_single() for args in train_args] experimenter_class.print_results(self.global_args, results, logger)
def put(self, request, group, style): meta = ModelMeta.from_json(request.body) default_type = AlgorithmGroups(group).types()[0] model = Model.from_id_str(meta.id) target_meta = MetaId( DatabaseAvailableModels.local_default_models(style, default_type), Step.meta(default_type).model_dir()) target_model = Model(target_meta) model.copy_to(target_model, override=True) return Response()
def __init__(self, settings: AlgorithmPredictorSettings): super().__init__(settings) meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) from ommr4all.settings import BASE_DIR model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/default_models/fraktur/text_calamari/', meta.type())) settings = AlgorithmPredictorSettings(model=model, ) settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT self.ocr_predictor = meta.create_predictor(settings)
def __init__(self, settings: AlgorithmPredictorSettings): super().__init__(settings) self.document_id = settings.params.documentId self.document_text = settings.params.documentText self.document_similar_tester = SimilarDocumentChecker() self.text_normalizer = LyricsNormalizationProcessor( LyricsNormalizationParams(LyricsNormalization.WORDS)) meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) from ommr4all.settings import BASE_DIR model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/default_models/fraktur/text_calamari/', meta.type())) settings = AlgorithmPredictorSettings(model=model, ) settings.params.ctcDecoder.params.type = CTCDecoderParams.CTC_DEFAULT self.ocr_predictor = meta.create_predictor(settings)
from omr.dataset.datafiles import dataset_by_locked_pages, LockState random.seed(1) np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Gothic_Test') meta = Step.meta(AlgorithmTypes.OCR_CALAMARI) # model = meta.newest_model_for_book(book) model = Model( MetaId.from_custom_path( BASE_DIR + '/internal_storage/pretrained_models/text_calamari/fraktur_historical', meta.type())) settings = AlgorithmPredictorSettings(model=model, ) pred = meta.create_predictor(settings) ps: List[PredictionResult] = list(pred.predict(book.pages()[0:1])) for i, p in enumerate(ps): canvas = PcGtsCanvas(p.pcgts.page, p.text_lines[0].line.operation.scale_reference) for j, s in enumerate(p.text_lines): canvas.draw(s)
from shared.pcgtscanvas import PcGtsCanvas from omr.dataset.datafiles import dataset_by_locked_pages, LockState random.seed(1) np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages( 0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Paper_New_York') meta = Step.meta(AlgorithmTypes.SYLLABLES_FROM_TEXT) model = meta.best_model_for_book(book) settings = AlgorithmPredictorSettings(model=model, ) pred = meta.create_predictor(settings) ps: List[PredictionResult] = list(pred.predict(book.pages()[:1])) for i, p in enumerate(ps): pmr = p.page_match_result canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2) canvas.draw(pmr.text_prediction_result.text_lines[4], color=(25, 150, 25), background=True) # canvas.draw(pmr.match_results) # canvas.draw(p.annotations) canvas.show()
os.environ['DJANGO_SETTINGS_MODULE'] = 'ommr4all.settings' django.setup() from ommr4all.settings import BASE_DIR import random import matplotlib.pyplot as plt from shared.pcgtscanvas import PcGtsCanvas from omr.dataset.datafiles import dataset_by_locked_pages, LockState from database.file_formats.pcgts import PageScaleReference random.seed(1) np.random.seed(1) if False: train_pcgts, val_pcgts = dataset_by_locked_pages(0.8, [LockState(Locks.SYMBOLS, True), LockState(Locks.LAYOUT, True)], True, [ # DatabaseBook('Graduel_Part_1'), # DatabaseBook('Graduel_Part_2'), # DatabaseBook('Graduel_Part_3'), ]) book = DatabaseBook('Paper_New_York') meta = Step.meta(AlgorithmTypes.SYLLABLES_IN_ORDER) model = meta.best_model_for_book(book) settings = AlgorithmPredictorSettings( model=model, ) pred = meta.create_predictor(settings) ps: List[PredictionResult] = list(pred.predict(book.pages()[:1])) for i, p in enumerate(ps): pmr = p.page_match_result canvas = PcGtsCanvas(pmr.pcgts.page, PageScaleReference.NORMALIZED_X2) canvas.draw(pmr.match_results) canvas.draw(p.annotations) canvas.show()
def get(self, request, group, style): default_type = AlgorithmGroups(group).types()[0] return Response( Step.meta(default_type).list_available_models_for_style( style).to_dict())