def check_options_optimizing(options): if options.optimize >= 2: check_external_program( program='pngquant', package='pngquant', version_checker=pngquant.version, need_version='2.0.1', required_for='--optimize {2,3}', ) if options.optimize >= 2: # Although we use JBIG2 for optimize=1, don't nag about it unless the # user is asking for more optimization check_external_program( program='jbig2', package='jbig2enc', version_checker=jbig2enc.version, need_version='0.28', required_for='--optimize {2,3} | --jbig2-lossy', recommended=True if not options.jbig2_lossy else False, ) if options.optimize == 0 and any( [options.jbig2_lossy, options.png_quality, options.jpeg_quality]): log.warning( "The arguments --jbig2-lossy, --png-quality, and --jpeg-quality " "will be ignored because --optimize=0.")
def check_options(options): gs_version = ghostscript.version() check_external_program( program='gs', package='ghostscript', version_checker=gs_version, need_version='9.15', # limited by Travis CI / Ubuntu 14.04 backports ) if gs_version in ('9.24', '9.51'): raise MissingDependencyError( f"Ghostscript {gs_version} contains serious regressions and is not " "supported. Please upgrade to a newer version, or downgrade to the " "previous version.") # We have these constraints to check for. # 1. Ghostscript < 9.20 mangles multibyte Unicode # 2. hocr doesn't work on non-Latin languages (so don't select it) is_latin = options.languages.issubset(HOCR_OK_LANGS) if gs_version < '9.20' and options.output_type != 'pdf' and not is_latin: # https://bugs.ghostscript.com/show_bug.cgi?id=696874 # Ghostscript < 9.20 fails to encode multibyte characters properly log.warning( f"The installed version of Ghostscript ({gs_version}) does not work " "correctly with the OCR languages you specified. Use --output-type pdf or " "upgrade to Ghostscript 9.20 or later to avoid this issue.") if options.output_type == 'pdfa': options.output_type = 'pdfa-2' if options.output_type == 'pdfa-3' and ghostscript.version() < '9.19': raise MissingDependencyError( "--output-type pdfa-3 requires Ghostscript 9.19 or later")
def check_options(options): check_external_program( program='tesseract', package={'linux': 'tesseract-ocr'}, version_checker=tesseract.version, need_version='4.0.0', # using backport for Travis CI ) # Decide on what renderer to use if options.pdf_renderer == 'auto': options.pdf_renderer = 'sandwich' if options.pdf_renderer == 'sandwich' and not tesseract.has_textonly_pdf( set(options.languages)): raise MissingDependencyError( "You are using an alpha version of Tesseract 4.0 that does not support " "the textonly_pdf parameter. We don't support versions this old.") if not tesseract.has_user_words() and (options.user_words or options.user_patterns): log.warning( "Tesseract 4.0 ignores --user-words and --user-patterns, so these " "arguments have no effect.") if options.tesseract_pagesegmode in (0, 2): log.warning( "The --tesseract-pagesegmode argument you select will disable OCR. " "This may cause processing to fail.")
def check_options_preprocessing(options): if options.clean_final: options.clean = True if options.unpaper_args and not options.clean: raise BadArgsError("--clean is required for --unpaper-args") if options.clean: check_external_program( program='unpaper', package='unpaper', version_checker=unpaper.version, need_version='6.1', required_for=['--clean, --clean-final'], ) try: if options.unpaper_args: options.unpaper_args = unpaper.validate_custom_args( options.unpaper_args) except Exception as e: raise BadArgsError("--unpaper-args: " + str(e)) from e
def check_options(options): check_external_program( program='tesseract', package={'linux': 'tesseract-ocr'}, version_checker=tesseract.version, need_version='4.0.0-beta.1', # using backport for Travis CI version_parser=tesseract.TesseractVersion, ) # Decide on what renderer to use if options.pdf_renderer == 'auto': options.pdf_renderer = 'sandwich' if not tesseract.has_user_words() and (options.user_words or options.user_patterns): log.warning( "Tesseract 4.0 ignores --user-words and --user-patterns, so these " "arguments have no effect.") if options.tesseract_pagesegmode in (0, 2): log.warning( "The --tesseract-pagesegmode argument you select will disable OCR. " "This may cause processing to fail.")