Пример #1
0
def test_remove_background(spoof_tesseract_noop):
    from PIL import Image

    # Ensure the input image does not contain pure white/black
    im = Image.open(_infile('congress.jpg'))
    assert im.getextrema() != ((0, 255), (0, 255), (0, 255))

    output_pdf = check_ocrmypdf(
        'congress.jpg', 'test_remove_bg.pdf', '--remove-background',
        '--image-dpi', '150',
        env=spoof_tesseract_noop)

    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    output_png = _outfile('remove_bg.png')

    rasterize_pdf(
        output_pdf,
        output_png,
        xres=100,
        yres=100,
        raster_device='png16m',
        log=log)


    # The output image should contain pure white and black
    im = Image.open(output_png)
    assert im.getextrema() == ((0, 255), (0, 255), (0, 255))
Пример #2
0
def test_deskew(spoof_tesseract_noop):
    # Run with deskew
    deskewed_pdf = check_ocrmypdf(
        'skew.pdf', 'test_deskew.pdf', '-d', '-v', '1', env=spoof_tesseract_noop)

    # Now render as an image again and use Leptonica to find the skew angle
    # to confirm that it was deskewed
    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    deskewed_png = _outfile('deskewed.png')

    rasterize_pdf(
        deskewed_pdf,
        deskewed_png,
        xres=150,
        yres=150,
        raster_device='pngmono',
        log=log)

    from ocrmypdf.leptonica import Pix
    pix = Pix.read(deskewed_png)
    skew_angle, skew_confidence = pix.find_skew()

    print(skew_angle)
    assert -0.5 < skew_angle < 0.5, "Deskewing failed"
Пример #3
0
def test_remove_background(spoof_tesseract_noop):
    from PIL import Image

    # Ensure the input image does not contain pure white/black
    im = Image.open(_infile('congress.jpg'))
    assert im.getextrema() != ((0, 255), (0, 255), (0, 255))

    output_pdf = check_ocrmypdf('congress.jpg',
                                'test_remove_bg.pdf',
                                '--remove-background',
                                '--image-dpi',
                                '150',
                                env=spoof_tesseract_noop)

    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    output_png = _outfile('remove_bg.png')

    rasterize_pdf(output_pdf,
                  output_png,
                  xres=100,
                  yres=100,
                  raster_device='png16m',
                  log=log)

    # The output image should contain pure white and black
    im = Image.open(output_png)
    assert im.getextrema() == ((0, 255), (0, 255), (0, 255))
Пример #4
0
def test_deskew(spoof_tesseract_noop):
    # Run with deskew
    deskewed_pdf = check_ocrmypdf('skew.pdf',
                                  'test_deskew.pdf',
                                  '-d',
                                  '-v',
                                  '1',
                                  env=spoof_tesseract_noop)

    # Now render as an image again and use Leptonica to find the skew angle
    # to confirm that it was deskewed
    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    deskewed_png = _outfile('deskewed.png')

    rasterize_pdf(deskewed_pdf,
                  deskewed_png,
                  xres=150,
                  yres=150,
                  raster_device='pngmono',
                  log=log)

    from ocrmypdf.leptonica import Pix
    pix = Pix.read(deskewed_png)
    skew_angle, skew_confidence = pix.find_skew()

    print(skew_angle)
    assert -0.5 < skew_angle < 0.5, "Deskewing failed"
Пример #5
0
def test_deskew():
    # Run with deskew
    deskewed_pdf = check_ocrmypdf('skew.pdf', 'test_deskew.pdf', '-d')

    # Now render as an image again and use Leptonica to find the skew angle
    # to confirm that it was deskewed
    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    deskewed_png = _make_output('deskewed.png')

    rasterize_pdf(
        deskewed_pdf,
        deskewed_png,
        xres=150,
        yres=150,
        raster_device='pngmono',
        log=log)

    from ocrmypdf.leptonica import pixRead, pixDestroy, pixFindSkew
    pix = pixRead(deskewed_png)
    skew_angle, skew_confidence = pixFindSkew(pix)
    pix = pixDestroy(pix)

    print(skew_angle)
    assert -0.5 < skew_angle < 0.5, "Deskewing failed"
Пример #6
0
def test_deskew():
    # Run with deskew
    deskewed_pdf = check_ocrmypdf('skew.pdf', 'test_deskew.pdf', '-d')

    # Now render as an image again and use Leptonica to find the skew angle
    # to confirm that it was deskewed
    from ocrmypdf.ghostscript import rasterize_pdf
    import logging
    log = logging.getLogger()

    deskewed_png = _make_output('deskewed.png')

    rasterize_pdf(deskewed_pdf,
                  deskewed_png,
                  xres=150,
                  yres=150,
                  raster_device='pngmono',
                  log=log)

    from ocrmypdf.leptonica import pixRead, pixDestroy, pixFindSkew
    pix = pixRead(deskewed_png)
    skew_angle, skew_confidence = pixFindSkew(pix)
    pix = pixDestroy(pix)

    print(skew_angle)
    assert -0.5 < skew_angle < 0.5, "Deskewing failed"
Пример #7
0
 def rasterize(pdf, pageno, png):
     if os.path.exists(png):
         print(png)
         return
     ghostscript.rasterize_pdf(
         pdf,
         png,
         xres=100, yres=100,
         raster_device='pngmono', log=gslog, pageno=pageno)
Пример #8
0
 def rasterize(pdf, pageno, png):
     if os.path.exists(png):
         print(png)
         return
     ghostscript.rasterize_pdf(
         pdf,
         png,
         xres=100, yres=100,
         raster_device='pngmono', log=gslog, pageno=pageno)