def convertPDFToTiff(src, dst): try: args = [] bDst = dst.encode('utf-8') bSrc = src.encode('utf-8') args.append(b'' + ghostExecutable.encode('utf-8')) args.append(b'' + ghostDevice.encode('utf-8')) if ghostPause == 1: noPause = b"" + ghostCommandNoPause.encode('utf-8') args.append(noPause) if ghostDownScale == 1: args.append(b"" + ghostCommandDsf.encode('utf-8') + str(ghostDownScaleFactor).encode('utf-8')) args.append(b'' + ghostCommandOut.encode('utf-8')) args.append(b'' + bDst) args.append(b'' + bSrc) if ghostQuit == 1: args.append(b'' + ghostCommandForce.encode('utf-8')) args.append(b'' + ghostCommandQuit.encode('utf-8')) g = GS(*args) g.exit() except Exception as e: print(traceback.print_exc(), e) return False return True
def pdf_to_image(input_file, output_file): input_file = os.path.abspath(input_file) output_file = os.path.abspath(output_file) # Read headphone model from the PDF f = open(input_file, 'rb') text = PyPDF2.PdfFileReader(f).getPage(0).extractText() if 'crinacle' in text.lower(): raise ValueError('Measured by Crinacle') # Convert to image with ghostscript # Using temporary paths with Ghostscript because it seems to be unable to work with non-ascii characters tmp_in = os.path.join(os.path.split(input_file)[0], '__tmp.pdf') tmp_out = os.path.join(os.path.split(output_file)[0], '__tmp.png') if tmp_in == input_file or tmp_out == output_file: # Skip tmp files in case it was passed as input raise ValueError('tmp file') shutil.copy(input_file, tmp_in) gs = Ghostscript(b'pdf2png', b'-dNOPAUSE', b'-sDEVICE=png16m', b'-dBATCH', b'-r600', b'-dUseCropBox', f'-sOutputFile={tmp_out}'.encode('utf-8'), tmp_in.encode('utf-8')) gs.exit() shutil.copy(tmp_out, output_file) print('\nSaved image to "{}"\n'.format(output_file)) f.close() return Image.open(output_file)
def convertPDFToTiff1(src, dst): try: args = [] bDst = dst bSrc = src args.append(ghostExecutable) args.append(ghostDevice) if ghostPause == 1: noPause = ghostCommandNoPause args.append(noPause) if ghostDownScale == 1: args.append(ghostCommandDsf + str(ghostDownScaleFactor)) args.append(ghostCommandOut) args.append(bDst) args.append(bSrc) if ghostQuit == 1: args.append(ghostCommandForce) args.append(ghostCommandQuit) print(*args) g = GS(*args) g.exit() except Exception as e: print(traceback.print_exc(), e) return False return True
def pdf_to_image(input_file, output_dir): input_file = os.path.abspath(input_file) output_dir = os.path.abspath(output_dir) # Read headphone model from the PDF f = open(input_file, 'rb') pdf = PyPDF2.PdfFileReader(f) page = pdf.getPage(0) try: t = page.extractText() start_ind = t.index('All rights reserved.') + len( 'All rights reserved.') end_ind = t.index('%THD+noise') name = t[start_ind:end_ind] print('Read "{name}" in "{fp}"'.format(name=name, fp=input_file)) except: print('Fail to read "{}"'.format(input_file)) return # Convert to image with ghostscript output_file_path = '{}.png'.format(os.path.join(output_dir, name)) Ghostscript(b'pdf2png', b'-dNOPAUSE', b'-sDEVICE=png16m', b'-dBATCH', b'-r600', b'-dUseCropBox', '-sOutputFile={}'.format(output_file_path).encode('utf-8'), input_file.encode('utf-8')) print('\nSaved image to "{}"\n'.format(output_file_path)) f.close()
def pdf_to_image(input_file, output_file): input_file = os.path.abspath(input_file) output_file = os.path.abspath(output_file) # Read headphone model from the PDF f = open(input_file, 'rb') # Convert to image with ghostscript # Using temporary paths with Ghostscript because it seems to be unable to work with non-ascii characters tmp_in = os.path.join(os.path.split(input_file)[0], '__tmp.pdf') tmp_out = os.path.join(os.path.split(output_file)[0], '__tmp.png') if tmp_in == input_file or tmp_out == output_file: return shutil.copy(input_file, tmp_in) Ghostscript( b'pdf2png', b'-dNOPAUSE', b'-sDEVICE=png16m', b'-dBATCH', b'-r600', b'-dUseCropBox', f'-sOutputFile={tmp_out}'.encode('utf-8'), tmp_in.encode('utf-8') ) shutil.copy(tmp_out, output_file) print('\nSaved image to "{}"\n'.format(output_file)) f.close() return Image.open(output_file)
def pdf2png(pdf, size): """Transform a PDF to a PNG thumbnail.""" temporary_name = os.path.join(gettempdir(), uuid4().hex) # Dump it to a temp file so that we can feed it to ghostscript # It would be cool to use gs.run_file, but it almost never works real_file = NamedTemporaryFile(delete=False) pdf.seek(0) real_file.write(pdf.read()) real_file.close() args = [s.encode('utf-8') for s in [ "", "-sstdout=/dev/null", "-dNOPAUSE", "-dBATCH", "-dSAFER", "-dFirstPage=1", "-dLastPage=1", "-dTextAlphaBits=4", "-dGraphicsAlphaBits=4", "-sDEVICE=png16m", "-r42", "-sOutputFile=%s" % temporary_name, "-f%s" % real_file.name]] Ghostscript(*args) os.remove(real_file.name) with open(temporary_name, 'rb') as f: img = BytesIO(f.read()) os.remove(temporary_name) return image2png(img, size)
def pdf_to_image(input_file, output_file): input_file = os.path.abspath(input_file) output_file = os.path.abspath(output_file) # Read headphone model from the PDF f = open(input_file, 'rb') # Convert to image with ghostscript Ghostscript(b'pdf2png', b'-dNOPAUSE', b'-sDEVICE=png16m', b'-dBATCH', b'-r600', b'-dUseCropBox', '-sOutputFile={}'.format(output_file).encode('utf-8'), input_file.encode('utf-8')) print('\nSaved image to "{}"\n'.format(output_file)) f.close() return Image.open(output_file)