def test_get_AllTextLine(self): with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f: page = parseString(f.read().encode('utf8'), silence=True).get_Page() assert len(page.get_AllTextLines()) == 55
def setUp(self): with open(assets.path_to('glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml'), 'rb') as f: self.xml_as_str = f.read() self.pcgts = parseString(self.xml_as_str, silence=True)
# -*- coding: utf-8 -*- from os.path import join as pjoin from pathlib import Path from tempfile import TemporaryDirectory from tests.base import TestCase, assets, main, copy_of_directory from ocrd.resolver import Resolver from ocrd_utils import pushd_popd, initLogging METS_HEROLD = assets.url_of('SBB0000F29300010000/data/mets.xml') FOLDER_KANT = assets.path_to('kant_aufklaerung_1784') # pylint: disable=redundant-unittest-assert, broad-except, deprecated-method, too-many-public-methods class TestResolver(TestCase): def setUp(self): initLogging() self.resolver = Resolver() def test_workspace_from_url_bad(self): with self.assertRaisesRegex(Exception, "Must pass 'mets_url'"): self.resolver.workspace_from_url(None) def test_workspace_from_url_tempdir(self): self.resolver.workspace_from_url( mets_basename='foo.xml', mets_url='https://raw.githubusercontent.com/OCR-D/assets/master/data/kant_aufklaerung_1784/data/mets.xml')
def _fixture_kant_complex(tmp_path): copytree(assets.path_to('kant_aufklaerung_1784-complex/data'), str(tmp_path)) yield Workspace(Resolver, directory=tmp_path)
def test_str(self): with Image.open(assets.path_to('SBB0000F29300010000/data/OCR-D-IMG/FILE_0001_IMAGE.tif')) as img: exif = OcrdExif(img) print(str(exif.to_xml()))
from tests.base import TestCase, main, assets, create_ocrd_file, create_ocrd_file_with_defaults from ocrd_utils import MIMETYPE_PAGE from ocrd_models import OcrdMets from ocrd_modelfactory import (exif_from_filename, page_from_image, page_from_file) SAMPLE_IMG = assets.path_to( 'kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif') SAMPLE_PAGE = assets.path_to( 'kant_aufklaerung_1784/data/OCR-D-GT-PAGE/PAGE_0017_PAGE.xml') class TestModelFactory(TestCase): def test_exif_from_filename(self): exif_from_filename(SAMPLE_IMG) with self.assertRaisesRegex( Exception, "Must pass 'image_filename' to 'exif_from_filename'"): exif_from_filename(None) def test_page_from_file(self): f = create_ocrd_file_with_defaults(mimetype='image/tiff', local_filename=SAMPLE_IMG, ID='file1') self.assertEqual(f.mimetype, 'image/tiff') p = page_from_file(f) self.assertEqual(p.pcGtsId, f.ID) self.assertEqual(p.get_Page().imageWidth, 1457) def test_page_from_file_page(self):
def _fixture_sbb_data_tmp(tmp_path): copytree(assets.path_to('SBB0000F29300010000/data'), str(tmp_path)) yield str(tmp_path)
def test_validate_filename_off(self): report = PageValidator.validate(filename=assets.path_to( 'glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml'), strictness='off') self.assertEqual(len(report.errors), 0, 'no errors')
from tests.base import TestCase, assets, main # pylint: disable=import-error,no-name-in-module from ocrd.resolver import Resolver from ocrd_validators import PageValidator from ocrd_validators.page_validator import get_text, set_text, ConsistencyError from ocrd_models.ocrd_page import parse, TextEquivType from ocrd_utils import pushd_popd FAULTY_GLYPH_PAGE_FILENAME = filename = assets.path_to( 'glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml') class TestPageValidator(TestCase): def setUp(self): pass def test_validate_err(self): with self.assertRaisesRegex( Exception, 'At least one of ocrd_page, ocrd_file or filename must be set' ): PageValidator.validate() with self.assertRaisesRegex( Exception, 'page_textequiv_strategy best not implemented'): PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, page_textequiv_strategy='best') # test with deprecated name with self.assertRaisesRegex( Exception, 'page_textequiv_strategy best not implemented'): PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, strategy='best') with self.assertRaisesRegex(
def test_resolve_image_exif(self): with pushd_popd(assets.path_to('kant_aufklaerung_1784/data/')): ws = self.resolver.workspace_from_url('mets.xml') exif = ws.resolve_image_exif('OCR-D-IMG/INPUT_0017.tif') self.assertEqual(exif.compression, 'jpeg') self.assertEqual(exif.width, 1457)
def test_validate_filename(self): report = PageValidator.validate(filename=assets.path_to( 'glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml')) self.assertEqual(len(report.errors), 17, '17 errors')
def test_validate_page(self): page_path = assets.path_to( 'glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml') result = self.runner.invoke(validate_cli, ['page', page_path]) self.assertEqual(result.exit_code, 1) self.assertIn('<report valid="false">', result.stdout)
def _fixture_sbb(tmp_path): src_path = assets.path_to('SBB0000F29300010000/data') dst_path = tmp_path / 'SBB_directory' shutil.copytree(src_path, dst_path) mets_path = str(join(dst_path, 'mets.xml')) yield OcrdMets(filename=mets_path)
def test_merge(sbb_sample_01): assert len(sbb_sample_01.file_groups) == 17 other_mets = OcrdMets(filename=assets.path_to('kant_aufklaerung_1784/data/mets.xml')) sbb_sample_01.merge(other_mets, fileGrp_mapping={'OCR-D-IMG': 'FOO'}) assert len(sbb_sample_01.file_groups) == 18
def test_validate_page(self): page_path = assets.path_to( 'glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml') code, out, _ = self.invoke_cli(validate_cli, ['page', page_path]) self.assertEqual(code, 1) self.assertIn('<report valid="false">', out)
import numpy as np import pytest from tests.base import (assets, main, FIFOIO) from ocrd_models import (OcrdFile, OcrdMets) from ocrd_models.ocrd_page import parseString from ocrd_models.ocrd_page import TextRegionType, CoordsType, AlternativeImageType from ocrd_utils import polygon_mask, xywh_from_polygon, bbox_from_polygon, points_from_polygon from ocrd_modelfactory import page_from_file from ocrd.resolver import Resolver from ocrd.workspace import Workspace TMP_FOLDER = '/tmp/test-core-workspace' SRC_METS = assets.path_to('kant_aufklaerung_1784/data/mets.xml') SAMPLE_FILE_FILEGRP = 'OCR-D-IMG' SAMPLE_FILE_ID = 'INPUT_0017' SAMPLE_FILE_URL = join(SAMPLE_FILE_FILEGRP, '%s.tif' % SAMPLE_FILE_ID) def copytree(src, dst, *args, **kwargs): rmtree(dst) copytree_(src, dst, *args, **kwargs) def count_files(d): return sum(len(files) for _, _, files in walk(d))
def test_remove_page_after_remove_file(self): with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir: mets = OcrdMets(filename=join(tempdir, 'mets.xml')) self.assertEqual(mets.physical_pages, ['PHYS_0001', 'PHYS_0002', 'PHYS_0005']) mets.remove_one_file('FILE_0005_IMAGE') self.assertEqual(mets.physical_pages, ['PHYS_0001', 'PHYS_0002'])
def test_remove_file_group_rmdir(self): with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir: workspace = Workspace(self.resolver, directory=tempdir) self.assertTrue(exists(join(tempdir, 'OCR-D-IMG'))) workspace.remove_file_group('OCR-D-IMG', recursive=True) self.assertFalse(exists(join(tempdir, 'OCR-D-IMG')))