def generate_pdf(input_file, output_pdf, output_text, options): tesseract.generate_pdf( input_file=input_file, output_pdf=output_pdf, output_text=output_text, languages=options.languages, engine_mode=options.tesseract_oem, tessconfig=options.tesseract_config, timeout=options.tesseract_timeout, pagesegmode=options.tesseract_pagesegmode, user_words=options.user_words, user_patterns=options.user_patterns, )
def test_image_too_large_pdf(monkeypatch, resources, outdir): def dummy_run(args, *, env=None, **kwargs): raise subprocess.CalledProcessError(1, 'tesseract', output=b'Image too large') monkeypatch.setattr(tesseract, 'run', dummy_run) tesseract.generate_pdf( input_file=resources / 'crom.png', output_pdf=outdir / 'pdf.pdf', output_text=outdir / 'txt.txt', languages=['eng'], engine_mode=None, tessconfig=[], timeout=180.0, pagesegmode=None, user_words=None, user_patterns=None, ) assert Path(outdir / 'txt.txt').read_text() == '[skipped page]' if os.name != 'nt': # different semantics assert Path(outdir / 'pdf.pdf').stat().st_size == 0