Пример #1
0
def check_options_optimizing(options):
    if options.optimize >= 2:
        check_external_program(
            program='pngquant',
            package='pngquant',
            version_checker=pngquant.version,
            need_version='2.0.1',
            required_for='--optimize {2,3}',
        )

    if options.optimize >= 2:
        # Although we use JBIG2 for optimize=1, don't nag about it unless the
        # user is asking for more optimization
        check_external_program(
            program='jbig2',
            package='jbig2enc',
            version_checker=jbig2enc.version,
            need_version='0.28',
            required_for='--optimize {2,3} | --jbig2-lossy',
            recommended=True if not options.jbig2_lossy else False,
        )

    if options.optimize == 0 and any(
        [options.jbig2_lossy, options.png_quality, options.jpeg_quality]):
        log.warning(
            "The arguments --jbig2-lossy, --png-quality, and --jpeg-quality "
            "will be ignored because --optimize=0.")
Пример #2
0
def check_options(options):
    gs_version = ghostscript.version()
    check_external_program(
        program='gs',
        package='ghostscript',
        version_checker=gs_version,
        need_version='9.15',  # limited by Travis CI / Ubuntu 14.04 backports
    )
    if gs_version in ('9.24', '9.51'):
        raise MissingDependencyError(
            f"Ghostscript {gs_version} contains serious regressions and is not "
            "supported. Please upgrade to a newer version, or downgrade to the "
            "previous version.")

    # We have these constraints to check for.
    # 1. Ghostscript < 9.20 mangles multibyte Unicode
    # 2. hocr doesn't work on non-Latin languages (so don't select it)
    is_latin = options.languages.issubset(HOCR_OK_LANGS)
    if gs_version < '9.20' and options.output_type != 'pdf' and not is_latin:
        # https://bugs.ghostscript.com/show_bug.cgi?id=696874
        # Ghostscript < 9.20 fails to encode multibyte characters properly
        log.warning(
            f"The installed version of Ghostscript ({gs_version}) does not work "
            "correctly with the OCR languages you specified. Use --output-type pdf or "
            "upgrade to Ghostscript 9.20 or later to avoid this issue.")

    if options.output_type == 'pdfa':
        options.output_type = 'pdfa-2'

    if options.output_type == 'pdfa-3' and ghostscript.version() < '9.19':
        raise MissingDependencyError(
            "--output-type pdfa-3 requires Ghostscript 9.19 or later")
Пример #3
0
def check_options(options):
    check_external_program(
        program='tesseract',
        package={'linux': 'tesseract-ocr'},
        version_checker=tesseract.version,
        need_version='4.0.0',  # using backport for Travis CI
    )

    # Decide on what renderer to use
    if options.pdf_renderer == 'auto':
        options.pdf_renderer = 'sandwich'

    if options.pdf_renderer == 'sandwich' and not tesseract.has_textonly_pdf(
            set(options.languages)):
        raise MissingDependencyError(
            "You are using an alpha version of Tesseract 4.0 that does not support "
            "the textonly_pdf parameter. We don't support versions this old.")
    if not tesseract.has_user_words() and (options.user_words
                                           or options.user_patterns):
        log.warning(
            "Tesseract 4.0 ignores --user-words and --user-patterns, so these "
            "arguments have no effect.")
    if options.tesseract_pagesegmode in (0, 2):
        log.warning(
            "The --tesseract-pagesegmode argument you select will disable OCR. "
            "This may cause processing to fail.")
Пример #4
0
def check_options_preprocessing(options):
    if options.clean_final:
        options.clean = True
    if options.unpaper_args and not options.clean:
        raise BadArgsError("--clean is required for --unpaper-args")
    if options.clean:
        check_external_program(
            program='unpaper',
            package='unpaper',
            version_checker=unpaper.version,
            need_version='6.1',
            required_for=['--clean, --clean-final'],
        )
        try:
            if options.unpaper_args:
                options.unpaper_args = unpaper.validate_custom_args(
                    options.unpaper_args)
        except Exception as e:
            raise BadArgsError("--unpaper-args: " + str(e)) from e
Пример #5
0
def check_options(options):
    check_external_program(
        program='tesseract',
        package={'linux': 'tesseract-ocr'},
        version_checker=tesseract.version,
        need_version='4.0.0-beta.1',  # using backport for Travis CI
        version_parser=tesseract.TesseractVersion,
    )

    # Decide on what renderer to use
    if options.pdf_renderer == 'auto':
        options.pdf_renderer = 'sandwich'

    if not tesseract.has_user_words() and (options.user_words
                                           or options.user_patterns):
        log.warning(
            "Tesseract 4.0 ignores --user-words and --user-patterns, so these "
            "arguments have no effect.")
    if options.tesseract_pagesegmode in (0, 2):
        log.warning(
            "The --tesseract-pagesegmode argument you select will disable OCR. "
            "This may cause processing to fail.")