示例#1
0
def ocrd_cli_wrap_processor(processorClass,
                            ocrd_tool=None,
                            mets=None,
                            working_dir=None,
                            dump_json=False,
                            version=False,
                            **kwargs):
    if dump_json:
        processorClass(workspace=None, dump_json=True)
    elif version:
        p = processorClass(workspace=None)
        print("Version %s, ocrd/core %s" % (p.version, OCRD_VERSION))
    elif mets is None:
        raise Exception('Error: Missing option "-m" / "--mets".')
    else:
        if mets.find('://') == -1:
            mets = 'file://' + os.path.abspath(mets)
        if mets.startswith('file://') and not os.path.exists(
                mets[len('file://'):]):
            raise Exception("File does not exist: %s" % mets)
        resolver = Resolver()
        workspace = resolver.workspace_from_url(mets, working_dir)
        run_processor(processorClass,
                      ocrd_tool,
                      mets,
                      workspace=workspace,
                      **kwargs)
示例#2
0
 def test_run_agent(self):
     no_agents_before = len(self.workspace.mets.agents)
     run_processor(DummyProcessor,
                   ocrd_tool=DUMMY_TOOL,
                   workspace=self.workspace)
     self.assertEqual(len(self.workspace.mets.agents), no_agents_before + 1,
                      'one more agent')
示例#3
0
 def test_copies_ok(self):
     with copy_of_directory(assets.url_of('SBB0000F29300010000/data')) as wsdir:
         workspace = Workspace(Resolver(), wsdir)
         input_files = workspace.mets.find_files(fileGrp='OCR-D-IMG')
         self.assertEqual(len(input_files), 3)
         output_files = workspace.mets.find_files(fileGrp='OUTPUT')
         self.assertEqual(len(output_files), 0)
         run_processor(
             DummyProcessor,
             input_file_grp='OCR-D-IMG',
             output_file_grp='OUTPUT',
             workspace=workspace
         )
         output_files = workspace.mets.find_files(fileGrp='OUTPUT')
         output_files.sort(key=lambda x: x.url)
         print([str(s) for s in output_files])
         self.assertEqual(output_files[0].url, 'OUTPUT/OUTPUT_0001.tif')
         self.assertEqual(output_files[1].url, 'OUTPUT/OUTPUT_0001.xml')
         self.assertEqual(page_from_file(output_files[1]).pcGtsId, output_files[1].ID)
         self.assertEqual(page_from_file(output_files[1]).get_Page().imageFilename, output_files[0].url)
         self.assertEqual(len(output_files), 6)
         self.assertEqual(len(workspace.mets.find_files(ID='//OUTPUT.*')), 6)
         self.assertEqual(len(workspace.mets.find_files(ID='//OUTPUT.*_PAGE')), 3)
         self.assertEqual(len(workspace.mets.find_files(fileGrp='OUTPUT', mimetype=MIMETYPE_PAGE)), 3)
         run_processor(
             DummyProcessor,
             input_file_grp='OUTPUT',
             output_file_grp='OUTPUT2',
             workspace=workspace
         )
         output2_files = workspace.mets.find_files(fileGrp='OUTPUT2')
         output2_files.sort(key=lambda x: x.url)
         self.assertEqual(len(output2_files), 3)
示例#4
0
 def test_crop(self):
     with copy_of_directory(assets.path_to('dfki-testdata/data')) as wsdir:
         ws = Workspace(self.resolver, wsdir)
         pagexml_before = len(ws.mets.find_files(mimetype=MIMETYPE_PAGE))
         run_processor(
             OcrdAnybaseocrCropper,
             resolver=self.resolver,
             mets_url=str(Path(wsdir, 'mets.xml')),
             input_file_grp='BIN',
             output_file_grp='CROP-TEST',
             parameter={},
         )
         ws.reload_mets()
         pagexml_after = len(ws.mets.find_files(mimetype=MIMETYPE_PAGE))
         self.assertEqual(pagexml_after, pagexml_before + 1)
 def test_crop(self):
     if not torch.cuda.is_available():
         pytest.skip('CUDA is not available, cannot test dewarping')
     with copy_of_directory(assets.path_to('dfki-testdata/data')) as wsdir:
         ws = Workspace(self.resolver, wsdir)
         pagexml_before = len(ws.mets.find_files(mimetype=MIMETYPE_PAGE))
         run_processor(OcrdAnybaseocrDewarper,
                       resolver=self.resolver,
                       mets_url=str(Path(wsdir, 'mets.xml')),
                       input_file_grp='BIN',
                       output_file_grp='DEWARP-TEST',
                       parameter={'model_path': str(self.model_path)})
         ws.reload_mets()
         pagexml_after = len(ws.mets.find_files(mimetype=MIMETYPE_PAGE))
         self.assertEqual(pagexml_after, pagexml_before + 1)
示例#6
0
 def test_with_mets_url_input_files(self):
     processor = run_processor(
         DummyProcessor,
         resolver=self.resolver,
         mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'))
     self.assertEqual(len(processor.input_files), 20)
     self.assertTrue(
         all([f.mimetype == MIMETYPE_PAGE for f in processor.input_files]))
示例#7
0
 def test_no_input_file_grp(self):
     processor = run_processor(
         DummyProcessor,
         resolver=self.resolver,
         mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'))
     with self.assertRaisesRegex(Exception,
                                 'Processor is missing input fileGrp'):
         _ = processor.input_files
示例#8
0
def ocrd_cli_wrap_processor(processorClass,
                            ocrd_tool=None,
                            mets=None,
                            working_dir=None,
                            cache_enabled=True,
                            *args,
                            **kwargs):
    if mets.find('://') == -1:
        mets = 'file://' + mets
    if mets.startswith('file://') and not os.path.exists(
            mets[len('file://'):]):
        raise Exception("File does not exist: %s" % mets)
    resolver = Resolver(cache_enabled=cache_enabled)
    workspace = resolver.workspace_from_url(mets, working_dir)
    run_processor(processorClass,
                  ocrd_tool,
                  mets,
                  workspace=workspace,
                  *args,
                  **kwargs)
示例#9
0
 def test_run_cli(self):
     with TemporaryDirectory() as tempdir:
         run_processor(DummyProcessor,
                       ocrd_tool=DUMMY_TOOL,
                       workspace=self.workspace)
         run_cli(
             'echo',
             mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'),
             resolver=Resolver(),
             workspace=None,
             page_id='page1',
             log_level='DEBUG',
             input_file_grp='INPUT',
             output_file_grp='OUTPUT',
             parameter='/path/to/param.json',
             working_dir=tempdir)
         run_cli(
             'echo',
             mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'),
             resolver=Resolver(),
         )
示例#10
0
 def test_parameter_url(self):
     with TemporaryDirectory() as tempdir:
         jsonpath = join(tempdir, 'params.json')
         with open(jsonpath, 'w') as f:
             f.write('{}')
         processor = run_processor(
             DummyProcessor,
             parameter='file://%s' % jsonpath,
             resolver=self.resolver,
             mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml')
         )
         self.assertEqual(len(processor.input_files), 35)
示例#11
0
 def test_parameter(self):
     with TemporaryDirectory() as tempdir:
         jsonpath = join(tempdir, 'params.json')
         with open(jsonpath, 'w') as f:
             f.write('{"baz": "quux"}')
         with open(jsonpath, 'r') as f:
             processor = run_processor(
                 DummyProcessor,
                 parameter=json.load(f),
                 resolver=self.resolver,
                 mets_url=assets.url_of(
                     'SBB0000F29300010000/data/mets.xml'))
         self.assertEqual(len(processor.input_files), 20)
示例#12
0
 def test_no_mets_url(self):
     with self.assertRaisesRegex(Exception,
                                 'pass mets_url to create a workspace'):
         run_processor(DummyProcessor, resolver=self.resolver)
示例#13
0
 def test_no_resolver(self):
     with self.assertRaisesRegex(Exception,
                                 'pass a resolver to create a workspace'):
         run_processor(DummyProcessor)
示例#14
0
 def test_with_mets_url_input_files(self):
     processor = run_processor(
         DummyProcessor,
         resolver=self.resolver,
         mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'))
     self.assertEqual(len(processor.input_files), 35)