示例#1
0
 def test_no_pageid_without_mets(self):
     f = OcrdFile(None)
     with self.assertRaisesRegex(Exception,
                                 ".*has no member 'mets' pointing.*"):
         print(f.pageId)
     with self.assertRaisesRegex(Exception,
                                 ".*has no member 'mets' pointing.*"):
         f.pageId = 'foo'
示例#2
0
 def test_loctype(self):
     f = OcrdFile(None)
     self.assertEqual(f.loctype, 'OTHER')
     self.assertEqual(f.otherloctype, 'FILE')
     f.otherloctype = 'foo'
     self.assertEqual(f.otherloctype, 'foo')
     f.loctype = 'URN'
     self.assertEqual(f.loctype, 'URN')
     self.assertEqual(f.otherloctype, None)
     f.otherloctype = 'foo'
     self.assertEqual(f.loctype, 'OTHER')
示例#3
0
 def test_ocrd_file_eq(self):
     mets = OcrdMets.empty_mets()
     f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff')
     self.assertEqual(f1 == f1, True)
     self.assertEqual(f1 != f1, False)
     f2 = mets.add_file('FOO', ID='FOO_2', mimetype='image/tiff')
     self.assertEqual(f1 == f2, False)
     f3 = OcrdFile(None, ID='TEMP_1', mimetype='image/tiff')
     f4 = OcrdFile(None, ID='TEMP_1', mimetype='image/tif')
     # be tolerant of different equivalent mimetypes
     self.assertEqual(f3 == f4, True)
     f5 = mets.add_file('TEMP', ID='TEMP_1', mimetype='image/tiff')
     self.assertEqual(f3 == f5, True)
示例#4
0
def test_file_group_wo_parent_new_version():
    """Test for new error message
    """
    with pytest.raises(
            ValueError,
            match=r"Must provide mets:file element this OcrdFile represent"):
        OcrdFile(None)
示例#5
0
    def _resolve_image_as_pil(self, image_url, coords=None):
        """
        Resolve an image URL to a PIL image.

        Args:
            - coords (list) : Coordinates of the bounding box to cut from the image

        Returns:
            Image or region in image as PIL.Image

        """
        log = getLogger('ocrd.workspace._resolve_image_as_pil')
        files = self.mets.find_files(url=image_url)
        f = files[0] if files else OcrdFile(None, url=image_url)
        image_filename = self.download_file(f).local_filename

        with pushd_popd(self.directory):
            pil_image = Image.open(image_filename)
            pil_image.load()  # alloc and give up the FD

        if coords is None:
            return pil_image

        log.debug("Converting PIL to OpenCV: %s", image_url)
        color_conversion = cv2.COLOR_GRAY2BGR if pil_image.mode in (
            '1', 'L') else cv2.COLOR_RGB2BGR
        pil_as_np_array = np.array(pil_image).astype(
            'uint8') if pil_image.mode == '1' else np.array(pil_image)
        cv2_image = cv2.cvtColor(pil_as_np_array, color_conversion)

        poly = np.array(coords, np.int32)
        log.debug("Cutting region %s from %s", coords, image_url)
        region_cut = cv2_image[np.min(poly[:, 1]):np.max(poly[:, 1]),
                               np.min(poly[:, 0]):np.max(poly[:, 0])]
        return Image.fromarray(region_cut)
示例#6
0
 def test_page_from_file(self):
     f = OcrdFile(None,
                  mimetype='image/tiff',
                  local_filename=SAMPLE_IMG,
                  ID='file1')
     self.assertEqual(f.mimetype, 'image/tiff')
     p = page_from_file(f)
     self.assertEqual(p.pcGtsId, f.ID)
     self.assertEqual(p.get_Page().imageWidth, 1457)
示例#7
0
    def download_url(self, url, **kwargs):
        """
        Download a URL to the workspace.

        Args:
            url (string): URL to download to directory
            **kwargs : See :py:mod:`ocrd_models.ocrd_file.OcrdFile`

        Returns:
            The local filename of the downloaded file
        """
        f = OcrdFile(None, url=url, **kwargs)
        f = self.download_file(f)
        return f.local_filename
示例#8
0
    def resolve_image_exif(self, image_url):
        """
        Get the EXIF metadata about an image URL as :class:`OcrdExif`

        Args:
            image_url (string) : URL of image

        Return
            :class:`OcrdExif`
        """
        f = next(self.mets.find_files(url=image_url), OcrdFile(None, url=image_url))
        image_filename = self.download_file(f).local_filename
        with Image.open(image_filename) as pil_img:
            ocrd_exif = OcrdExif(pil_img)
        return ocrd_exif
示例#9
0
    def resolve_image_exif(self, image_url):
        """
        Get the EXIF metadata about an image URL as :class:`OcrdExif`

        Args:
            image_url (string) : URL of image

        Return
            :class:`OcrdExif`
        """
        files = self.mets.find_files(url=image_url)
        f = files[0] if files else OcrdFile(None, url=image_url)
        image_filename = self.download_file(f).local_filename

        if image_url not in self.image_cache['exif']:
            # FIXME must be in the right directory
            self.image_cache['exif'][image_url] = OcrdExif(
                Image.open(image_filename))
        return self.image_cache['exif'][image_url]
示例#10
0
 def test_page_from_file_unsupported_mimetype(self):
     with self.assertRaisesRegex(ValueError, "Unsupported mimetype"):
         page_from_file(
             OcrdFile(None, local_filename=__file__, mimetype='foo/bar'))
示例#11
0
 def test_page_from_file_no_existe(self):
     with self.assertRaisesRegex(FileNotFoundError,
                                 "File not found: 'no-existe'"):
         page_from_file(
             OcrdFile(None, local_filename='no-existe', mimetype='foo/bar'))
示例#12
0
 def test_page_from_file_no_local_filename(self):
     with self.assertRaisesRegex(
             ValueError, "input_file must have 'local_filename' property"):
         page_from_file(OcrdFile(None, mimetype='image/tiff'))
示例#13
0
 def test_page_from_file_page(self):
     f = OcrdFile(None, mimetype=MIMETYPE_PAGE, local_filename=SAMPLE_PAGE)
     p = page_from_file(f)
     self.assertEqual(p.get_Page().imageWidth, 1457)
示例#14
0
 def test_fileGrp_wo_parent(self):
     f = OcrdFile(None)
     self.assertEqual(f.fileGrp, 'TEMP')
示例#15
0
 def test_set_url(self):
     f = OcrdFile(None)
     f.url = None
     f.url = 'http://foo'
     f.url = 'http://bar'
     self.assertEqual(f.url, 'http://bar')
示例#16
0
def test_file_group_wo_parent():
    with pytest.raises(ValueError) as val_err:
        OcrdFile(None)
    assert "not related to METS" in str(val_err.value)
示例#17
0
 def test_extension(self):
     f = OcrdFile(None, local_filename='/tmp/foo/bar/foo.bar')
     self.assertEqual(f.extension, '.bar')
示例#18
0
 def test_basename(self):
     f = OcrdFile(None, local_filename='/tmp/foo/bar/foo.bar')
     self.assertEqual(f.basename, 'foo.bar')
示例#19
0
 def test_set_id_none(self):
     f = OcrdFile(None)
     f.ID = 'foo12'
     self.assertEqual(f.ID, 'foo12')
     f.ID = None
     self.assertEqual(f.ID, 'foo12')
示例#20
0
 def test_constructor_url(self):
     f = OcrdFile(None, url="foo/bar")
     self.assertEqual(f.url, 'foo/bar')
     self.assertEqual(f.local_filename, 'foo/bar')
示例#21
0
    def _resolve_image_as_pil(self, image_url, coords=None):
        """
        Resolve an image URL to a PIL image.

        Args:
            - coords (list) : Coordinates of the bounding box to cut from the image

        Returns:
            Image or region in image as PIL.Image

        """
        log = getLogger('ocrd.workspace._resolve_image_as_pil')
        f = next(self.mets.find_files(url=image_url),
                 OcrdFile(None, url=image_url))
        image_filename = self.download_file(f).local_filename

        with pushd_popd(self.directory):
            pil_image = Image.open(image_filename)
            pil_image.load()  # alloc and give up the FD

        # Pillow does not properly support higher color depths
        # (e.g. 16-bit or 32-bit or floating point grayscale),
        # clipping its dynamic range to the lower 8-bit in
        # many operations (including paste, putalpha, ImageStat...),
        # even including conversion.
        # Cf. Pillow#3011 Pillow#3159 Pillow#3838 (still open in 8.0)
        # So to be on the safe side, we must re-quantize these
        # to 8-bit via numpy (conversion to/from which fortunately
        # seems to work reliably):
        if (pil_image.mode.startswith('I') or pil_image.mode.startswith('F')):
            arr_image = np.array(pil_image)
            if arr_image.dtype.kind == 'i':
                # signed integer is *not* trustworthy in this context
                # (usually a mistake in the array interface)
                log.debug('Casting image "%s" from signed to unsigned',
                          image_url)
                arr_image.dtype = np.dtype('u' + arr_image.dtype.name)
            if arr_image.dtype.kind == 'u':
                # integer needs to be scaled linearly to 8 bit
                # of course, an image might actually have some lower range
                # (e.g. 10-bit in I;16 or 20-bit in I or 4-bit in L),
                # but that would be guessing anyway, so here don't
                # make assumptions on _scale_, just reduce _precision_
                log.debug('Reducing image "%s" from depth %d bit to 8 bit',
                          image_url, arr_image.dtype.itemsize * 8)
                arr_image = arr_image >> 8 * (arr_image.dtype.itemsize - 1)
                arr_image = arr_image.astype(np.uint8)
            elif arr_image.dtype.kind == 'f':
                # float needs to be scaled from [0,1.0] to [0,255]
                log.debug('Reducing image "%s" from floating point to 8 bit',
                          image_url)
                arr_image *= 255
                arr_image = arr_image.astype(np.uint8)
            pil_image = Image.fromarray(arr_image)

        if coords is None:
            return pil_image

        # FIXME: remove or replace this by (image_from_polygon+) crop_image ...
        log.debug("Converting PIL to OpenCV: %s", image_url)
        color_conversion = cv2.COLOR_GRAY2BGR if pil_image.mode in (
            '1', 'L') else cv2.COLOR_RGB2BGR
        pil_as_np_array = np.array(pil_image).astype(
            'uint8') if pil_image.mode == '1' else np.array(pil_image)
        cv2_image = cv2.cvtColor(pil_as_np_array, color_conversion)

        poly = np.array(coords, np.int32)
        log.debug("Cutting region %s from %s", coords, image_url)
        region_cut = cv2_image[np.min(poly[:, 1]):np.max(poly[:, 1]),
                               np.min(poly[:, 0]):np.max(poly[:, 0])]
        return Image.fromarray(region_cut)
示例#22
0
 def test_basename_without_extension_tar(self):
     f = OcrdFile(None, local_filename='/tmp/foo/bar/foo.tar.gz')
     self.assertEqual(f.basename_without_extension, 'foo')
示例#23
0
 def test_basename_from_url(self):
     f = OcrdFile(None, url="http://foo.bar/quux")
     self.assertEqual(f.basename, 'quux')
示例#24
0
 def test_page_from_file_unsupported_mimetype(self):
     with self.assertRaisesRegex(Exception, "Unsupported mimetype"):
         page_from_file(OcrdFile(None, mimetype='foo/bar'))