Exemplo n.º 1
0
def tess4_available():
    """Check if a tesseract 4 binary is available, even if it's not the
    official "tesseract" on PATH

    """
    old_environ = os.environ.copy()
    try:
        os.environ['OCRMYPDF_TESSERACT'] = tess4_possible_location()
        return tesseract.v4() and tesseract.has_textonly_pdf()
    finally:
        os.environ = old_environ
Exemplo n.º 2
0
def tess4_available():
    """Check if a tesseract 4 binary is available, even if it's not the
    official "tesseract" on PATH

    """
    try:
        # ensure_tess4 locates the tess4 binary we are going to check
        env = ensure_tess4()
        with modified_os_environ(env):
            # Now jump into this environment and make sure it really is Tess4
            return tesseract.v4() and tesseract.has_textonly_pdf()
    except EnvironmentError:
        pass

    return False
Exemplo n.º 3
0
def tess4_available():
    """Check if a tesseract 4 binary is available, even if it's not the
    official "tesseract" on PATH

    """
    try:
        # ensure_tess4 locates the tess4 binary we are going to check
        env = ensure_tess4()
        with modified_os_environ(env):
            # Now jump into this environment and make sure it really is Tess4
            return tesseract.v4() and tesseract.has_textonly_pdf()
    except EnvironmentError:
        pass

    return False
Exemplo n.º 4
0
def ensure_tess4():
    if tesseract.v4():
        # Either "tesseract" on $PATH is already v4, or
        # OCRMYPDF_TESSERACT is tess4 already
        return os.environ.copy()

    if os.environ.get('OCRMYPDF_TESS4'):
        # OCRMYPDF_TESS4 is a hint environment variable that tells us to look
        # somewhere special for tess4 if and only if we need it. This allows
        # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3
        # on a system with both installed.
        env = os.environ.copy()
        env['OCRMYPDF_TESSERACT'] = env['OCRMYPDF_TESS4']
        return env

    raise EnvironmentError("Can't find Tesseract 4")
Exemplo n.º 5
0
def ensure_tess4():
    if tesseract.v4():
        # Either "tesseract" on $PATH is already v4, or
        # OCRMYPDF_TESSERACT is tess4 already
        return os.environ.copy()

    if os.environ.get('OCRMYPDF_TESS4'):
        # OCRMYPDF_TESS4 is a hint environment variable that tells us to look
        # somewhere special for tess4 if and only if we need it. This allows
        # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3
        # on a system with both installed.
        env = os.environ.copy()
        env['OCRMYPDF_TESSERACT'] = env['OCRMYPDF_TESS4']
        return env

    raise EnvironmentError("Can't find Tesseract 4")
Exemplo n.º 6
0
def _ensure_tess4():
    if tesseract.v4():
        # "tesseract" on $PATH is already v4
        return os.environ.copy()

    if os.environ.get('OCRMYPDF_TESS4'):
        # OCRMYPDF_TESS4 is a hint environment variable that tells us to look
        # somewhere special for tess4 if and only if we need it. This allows
        # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3
        # on a system with both installed.
        env = os.environ.copy()
        tess4 = Path(os.environ['OCRMYPDF_TESS4'])
        assert tess4.is_file()
        env['PATH'] = tess4.parent + ':' + env['PATH']
        return env

    raise EnvironmentError("Can't find Tesseract 4")
Exemplo n.º 7
0
def _ensure_tess4():
    if tesseract.v4():
        # "tesseract" on $PATH is already v4
        return os.environ.copy()

    if os.environ.get('OCRMYPDF_TESS4'):
        # OCRMYPDF_TESS4 is a hint environment variable that tells us to look
        # somewhere special for tess4 if and only if we need it. This allows
        # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3
        # on a system with both installed.
        env = os.environ.copy()
        tess4 = Path(os.environ['OCRMYPDF_TESS4'])
        assert tess4.is_file()
        env['PATH'] = tess4.parent + ':' + env['PATH']
        env['OCRMYPDF_TESS4'] = os.environ['OCRMYPDF_TESS4']
        return env

    raise EnvironmentError("Can't find Tesseract 4")
Exemplo n.º 8
0
def test_tesseract_v4():
    assert tesseract.v4()
Exemplo n.º 9
0
def test_tesseract_config_invalid(renderer, resources, outdir):
    cfg_file = outdir / 'test.cfg'
    with cfg_file.open('w') as f:
        f.write('''\
THIS FILE IS INVALID
''')

    p, out, err = run_ocrmypdf(
        resources / 'ccitt.pdf', outdir / 'out.pdf',
        '--pdf-renderer', renderer,
        '--tesseract-config', cfg_file)
    assert "parameter not found" in err.lower(), "No error message"
    assert p.returncode == ExitCode.invalid_config


@pytest.mark.skipif(tesseract.v4(), reason='arg has no effect in 4.0-beta1')
def test_user_words(resources, outdir):
    word_list = outdir / 'wordlist.txt'
    sidecar_before = outdir / 'sidecar_before.txt'
    sidecar_after = outdir / 'sidecar_after.txt'

    # Don't know how to make this test pass on various versions and platforms
    # so weaken to merely testing that the argument is accepted
    consistent = False

    if consistent:
        check_ocrmypdf(
            resources / 'crom.png', outdir / 'out.pdf',
            '--image-dpi', 150,
            '--sidecar', sidecar_before
        )
Exemplo n.º 10
0
#!/usr/bin/env python3
# © 2017 James R. Barlow: github.com/jbarlow83

import pytest
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.exec import tesseract

# Skip all tests in this file if not tesseract 3
pytestmark = pytest.mark.skipif(tesseract.v4(),
                                reason="tesseract 3.x required")


def test_textonly_pdf_on_tess3(resources, no_outpdf):
    p, _, _ = pytest.helpers.run_ocrmypdf(resources / 'linn.pdf', no_outpdf,
                                          '--pdf-renderer', 'tess4')

    assert p.returncode == ExitCode.missing_dependency


def test_oem_on_tess3(resources, no_outpdf):
    p, _, err = pytest.helpers.run_ocrmypdf(resources / 'aspect.pdf',
                                            no_outpdf, '--tesseract-oem', '1')

    assert p.returncode == ExitCode.ok
    assert 'argument ignored' in err
Exemplo n.º 11
0
# © 2017 James R. Barlow: github.com/jbarlow83

import pytest
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.exec import tesseract


# Skip all tests in this file if not tesseract 3
pytestmark = pytest.mark.skipif(tesseract.v4(),
                                reason="tesseract 3.x required")


@pytest.mark.skipif(tesseract.has_textonly_pdf(),
                    reason="check that missing dep is reported on old tess3")
def test_textonly_pdf_on_older_tess3(resources, no_outpdf):
    p, _, _ = pytest.helpers.run_ocrmypdf(
        resources / 'linn.pdf',
        no_outpdf, '--pdf-renderer', 'sandwich')

    assert p.returncode == ExitCode.missing_dependency


@pytest.mark.skipif(not tesseract.has_textonly_pdf(),
                    reason="check that feature is exercised on new test3")
def test_textonly_pdf_on_newer_tess3(resources, no_outpdf):
    p, _, _ = pytest.helpers.run_ocrmypdf(
        resources / 'linn.pdf',
        no_outpdf, '--pdf-renderer', 'sandwich')

    assert p.returncode == ExitCode.ok