Exemplo n.º 1
0
 def test_model_zoo(self):
     version = '1.0'
     url = f"https://github.com/Calamari-OCR/calamari_models/archive/{version}.tar.gz"
     with tempfile.TemporaryDirectory() as d:
         d = 'model_archive_permanent'  # for debugging
         os.makedirs(d, exist_ok=True)
         os.chdir(d)
         if not os.path.exists('calamari_models'):
             check_call([
                 'sh', '-c', ' '.join([
                     'wget', '-q', '-O', '-', url, '|', 'tar', 'xz', '&&',
                     'mv', f'calamari_models-{version}', 'calamari_models'
                 ])
             ])
         trainer_params = uw3_trainer_params(with_validation=True)
         args = PredictAndEvalArgs(
             checkpoint=glob(
                 os.path.join('calamari_models', 'antiqua_modern',
                              '*.ckpt.json')),
             predictor=PredictorParams(pipeline=DataPipelineParams(
                 batch_size=5)),
             data=trainer_params.gen.val_gen(),
         )
         full_evaluation = predict_and_eval_main(args)
         self.assertLess(
             full_evaluation['voted']['eval']['avg_ler'], 0.001,
             "The accuracy on the test data must be below 0.1%")
Exemplo n.º 2
0
 def test_predict_and_eval_uw3_with_voting(self):
     from calamari_ocr.test.test_train_file import uw3_trainer_params
     checkpoint = os.path.join(this_dir, "models", "best.ckpt")
     trainer_params = uw3_trainer_params(with_validation=True)
     args = PredictAndEvalArgs(
         checkpoint=[checkpoint, checkpoint, checkpoint],
         predictor=PredictorParams(pipeline=DataPipelineParams(
             batch_size=5)),
         data=trainer_params.gen.val_gen(),
     )
     main(args)
Exemplo n.º 3
0
 def test_predict_and_eval_hdf5(self):
     from calamari_ocr.test.test_train_hdf5 import default_trainer_params
     checkpoint = os.path.join(this_dir, "models", "best.ckpt")
     trainer_params = default_trainer_params(with_validation=True)
     args = PredictAndEvalArgs(
         checkpoint=[checkpoint],
         predictor=PredictorParams(pipeline=DataPipelineParams(
             num_processes=1)),
         data=trainer_params.gen.val_gen(),
     )
     main(args)
Exemplo n.º 4
0
 def test_model_zoo(self):
     version = "1.0"
     url = f"https://github.com/Calamari-OCR/calamari_models/archive/{version}.tar.gz"
     with tempfile.TemporaryDirectory() as d:
         d = "model_archive_permanent"  # for debugging
         os.makedirs(d, exist_ok=True)
         os.chdir(d)
         if not os.path.exists("calamari_models"):
             check_call(
                 [
                     "sh",
                     "-c",
                     " ".join(
                         [
                             "wget",
                             "-q",
                             "-O",
                             "-",
                             url,
                             "|",
                             "tar",
                             "xz",
                             "&&",
                             "mv",
                             f"calamari_models-{version}",
                             "calamari_models",
                         ]
                     ),
                 ]
             )
         trainer_params = uw3_trainer_params(with_validation=True)
         args = PredictAndEvalArgs(
             checkpoint=glob(os.path.join("calamari_models", "antiqua_modern", "*.ckpt.json")),
             predictor=PredictorParams(pipeline=DataPipelineParams(batch_size=5)),
             data=trainer_params.gen.val_gen(),
         )
         full_evaluation = predict_and_eval_main(args)
         self.assertLess(
             full_evaluation["voted"]["eval"]["avg_ler"],
             0.001,
             "The accuracy on the test data must be below 0.1%",
         )