示例#1
0
 def test_overwrite(self):
     resolver = Resolver()
     with TemporaryDirectory() as tempdir:
         workspace = resolver.workspace_from_url(
             assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
             dst_dir=tempdir)
         # should fail at step 3
         workspace.mets.add_file('OCR-D-SEG-WORD',
                                 url='foo/bar',
                                 ID='foo',
                                 pageId='page1',
                                 mimetype='image/tif')
         with self.assertRaisesRegex(
                 Exception,
                 r"Invalid task sequence input/output file groups: \[\"Output fileGrp\[@USE='OCR-D-SEG-WORD'\] already in METS!\"\]"
         ):
             validate_tasks([
                 ProcessorTask.parse(x) for x in [
                     "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                     "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                     "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                     "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
                 ]
             ], workspace)
         # should succeed b/c overwrite
         validate_tasks([
             ProcessorTask.parse(x) for x in [
                 "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                 "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                 "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                 "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
             ]
         ],
                        workspace,
                        overwrite=True)
示例#2
0
    def test_validate_sequence(self):
        resolver = Resolver()
        with TemporaryDirectory() as tempdir:
            workspace = resolver.workspace_from_url(
                assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
                dst_dir=tempdir)
            params_path = Path(tempdir, 'params.json')
            params_path.write_text('{"param1": true}')

            with self.assertRaisesRegex(
                    Exception,
                    "Input file group not contained in METS or produced by previous steps: FOO'"
            ):
                validate_tasks([
                    ProcessorTask.parse(x) for x in [
                        '%s -I OCR-D-IMG -O OUT1 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path),
                        '%s -I FOO -O OUT2 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path)
                    ]
                ], workspace)

            with self.assertRaisesRegex(
                    Exception, "Input fileGrp.@USE='IN'. not in METS!"):
                validate_tasks([
                    ProcessorTask.parse(x) for x in [
                        '%s -I IN -O OUT1 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path),
                    ]
                ], workspace)
示例#3
0
 def test_parse_no_out(self):
     task = ProcessorTask.parse('sample-processor -I IN')
     with self.assertRaisesRegex(
             Exception,
             'Processor requires output_file_grp but none was provided.'):
         task.validate()
     # this should validate
     task2 = ProcessorTask.parse('sample-processor-without-file-grp -I IN')
     self.assertTrue(task2.validate())
示例#4
0
文件: validate.py 项目: bertsky/core
def validate_process(tasks, workspace):
    '''
    Validate a sequence of tasks passable to 'ocrd process'
    '''
    if workspace:
        _inform_of_result(
            validate_tasks([ProcessorTask.parse(t) for t in tasks],
                           Workspace(Resolver(), directory=workspace)))
    else:
        for t in [ProcessorTask.parse(t) for t in tasks]:
            _inform_of_result(t.validate())
示例#5
0
 def test_fail_validate_param(self):
     task = ProcessorTask.parse('sample-processor -I IN -O OUT -p %s' %
                                self.param_fname)
     with self.assertRaisesRegex(
             Exception,
             r"Additional properties are not allowed \('foo' was unexpected\)"
     ):
         task.validate()
示例#6
0
 def test_parse_ok(self):
     task_str = 'sample-processor1 -I IN -O OUT -p /path/to/param.json'
     task = ProcessorTask.parse(task_str)
     self.assertEqual(task.executable, 'ocrd-sample-processor1')
     self.assertEqual(task.input_file_grps, ['IN'])
     self.assertEqual(task.output_file_grps, ['OUT'])
     self.assertEqual(task.parameter_path, '/path/to/param.json')
     self.assertEqual(str(task), task_str)
示例#7
0
 def test_parse_ok(self):
     task_str = 'sample-processor -I IN -O OUT -p %s' % self.param_fname
     task = ProcessorTask.parse(task_str)
     self.assertEqual(task.executable, 'ocrd-sample-processor')
     self.assertEqual(task.input_file_grps, ['IN'])
     self.assertEqual(task.output_file_grps, ['OUT'])
     self.assertEqual(json.dumps(task.parameters), PARAM_JSON)
     self.assertEqual(
         str(task), task_str.replace(self.param_fname, "'%s'" % PARAM_JSON))
示例#8
0
 def test_parse_implicit_after_validate(self):
     task = ProcessorTask.parse('%s -I IN -O OUT -p \'{"param1": true}\'' %
                                SAMPLE_NAME_REQUIRED_PARAM)
     task.validate()
     # TODO uncomment and adapt once OCR-D/spec#121 lands
     # self.assertEqual(task.input_file_grps, ['IN', 'SECOND_IN'])
     # self.assertEqual(task.output_file_grps, ['OUT', 'SECOND_OUT'])
     self.assertEqual(task.input_file_grps, ['IN'])
     self.assertEqual(task.output_file_grps, ['OUT'])
示例#9
0
 def test_422(self):
     """
     # OCR-D/core#422
     """
     resolver = Resolver()
     with TemporaryDirectory() as tempdir:
         workspace = resolver.workspace_from_url(
             assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
             dst_dir=tempdir)
         validate_tasks([
             ProcessorTask.parse(x) for x in [
                 "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                 "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                 "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                 "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
             ]
         ], workspace)
示例#10
0
 def test_fail_validate_executable(self):
     task = ProcessorTask.parse('no-such-processor -I IN')
     with self.assertRaisesRegex(Exception, 'Executable not found in '):
         task.validate()
示例#11
0
 def test_parse_parameter_none(self):
     task_str = 'sample-processor -I IN -O OUT1,OUT2'
     task = ProcessorTask.parse(task_str)
     self.assertEqual(task.parameters, {})
     self.assertEqual(str(task), task_str)
示例#12
0
 def test_parse_repeated_params(self):
     task_str = 'sample-processor -I IN -O OUT -p %s -P foo 23' % self.param_fname
     task = ProcessorTask.parse(task_str)
     self.assertEqual(task.parameters, {'foo': 23})
示例#13
0
 def test_parse_unknown(self):
     with self.assertRaisesRegex(Exception,
                                 'Failed parsing task description'):
         ProcessorTask.parse('sample-processor -x wrong wrong wrong')
示例#14
0
 def test_required_param(self):
     task = ProcessorTask.parse(
         'sample-processor-required-param -I IN -O OUT')
     with self.assertRaisesRegex(Exception,
                                 "'param1' is a required property"):
         task.validate()
示例#15
0
 def test_required_param(self):
     task = ProcessorTask.parse('%s -I IN -O OUT' %
                                SAMPLE_NAME_REQUIRED_PARAM)
     with self.assertRaisesRegex(Exception,
                                 "'param1' is a required property"):
         task.validate()
示例#16
0
 def test_fail_validate_executable(self):
     task = ProcessorTask.parse('sample-processor1 -I IN -O OUT -p /tmp')
     with self.assertRaisesRegex(Exception, 'Executable not found in '):
         task.validate()
示例#17
0
 def test_fail_validate_param(self):
     task = ProcessorTask.parse('sample-processor1 -I IN -O OUT -p /path/to/param.json')
     with self.assertRaisesRegex(Exception, 'Parameter file not readable'):
         task.validate()
示例#18
0
 def test_parse_no_in(self):
     task = ProcessorTask.parse('sample-processor')
     with self.assertRaisesRegex(Exception, 'must have input file group'):
         task.validate()
示例#19
0
 def test_fail_validate_param(self):
     task = ProcessorTask.parse(
         'sample-processor -I IN -O OUT -p /path/to/param.json')
     with self.assertRaisesRegex(Exception, 'Error parsing'):
         task.validate()