def roundtrip(self, testname, basename, srcf, decompress=False, compress=False, repaginate=False): dstd = os.path.join(expected.result_dir, testname) if not os.path.exists(dstd): os.makedirs(dstd) dstf = os.path.join(dstd, basename) hashfile = os.path.join(expected.result_dir, 'hashes.txt') hashkey = '%s/%s' % (testname, basename) hash = '------no-file-generated---------' expects = expected.results[hashkey] # If the test has been deliberately skipped, # we are done. Otherwise, execute it even # if we don't know about it yet, so we have # results to compare. result = 'fail' size = 0 try: if 'skip' in expects: result = 'skip requested' return self.skipTest(result) elif 'xfail' in expects: result = 'xfail requested' return self.fail(result) exists = os.path.exists(dstf) if expects or not exists: if exists: os.remove(dstf) trailer = pdfrw.PdfReader(srcf, decompress=decompress, verbose=False) writer = pdfrw.PdfWriter(dstf, compress=compress) if repaginate: writer.addpages(trailer.pages) else: writer.trailer = trailer writer.write() with open(dstf, 'rb') as f: data = f.read() size = len(data) if data: hash = hashlib.md5(data).hexdigest() else: os.remove(dstf) if expects: if len(expects) == 1: expects, = expects self.assertEqual(hash, expects) else: self.assertIn(hash, expects) result = 'pass' else: result = 'skip' self.skipTest('No hash available') finally: result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash) with open(hashfile, 'ab') as f: f.write(convert_store(result))
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
def roundtrip(self, testname, basename, srcf, decompress=False, compress=False, repaginate=False): dstd = os.path.join(expected.result_dir, testname) if not os.path.exists(dstd): os.makedirs(dstd) dstf = os.path.join(dstd, basename) hashfile = os.path.join(expected.result_dir, 'hashes.txt') hashkey = '%s/%s' % (testname, basename) hash = '------no-file-generated---------' expects = expected.results[hashkey] # If the test has been deliberately skipped, # we are done. Otherwise, execute it even # if we don't know about it yet, so we have # results to compare. result = 'fail' size = 0 try: if 'skip' in expects: result = 'skip requested' return self.skipTest(result) elif 'xfail' in expects: result = 'xfail requested' return self.fail(result) exists = os.path.exists(dstf) if expects or not exists: if exists: os.remove(dstf) trailer = pdfrw.PdfReader(srcf, decompress=decompress, verbose=False) if trailer.Encrypt: result = 'skip -- encrypt' hash = '------skip-encrypt-no-file------' return self.skipTest('File encrypted') writer = pdfrw.PdfWriter(dstf, compress=compress) if repaginate: writer.addpages(trailer.pages) else: writer.trailer = trailer writer.write() with open(dstf, 'rb') as f: data = f.read() size = len(data) if data: hash = hashlib.md5(data).hexdigest() else: os.remove(dstf) if expects: if len(expects) == 1: expects, = expects self.assertEqual(hash, expects) else: self.assertIn(hash, expects) result = 'pass' else: result = 'skip' self.skipTest('No hash available') finally: result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash) with open(hashfile, 'ab') as f: f.write(convert_store(result))
def decode_bytes(self, decode_this, expected): """ Decode to bytes""" self.assertEqual(PdfString(decode_this).to_bytes(), convert_store(expected))
def do_test(self, params, prev_results=[''], scrub=False): params = params.split() hashkey = 'examples/%s' % '_'.join(params) params = [lookup.get(x, x) for x in params] progname = params[0] params[0] = prog_dir % progname srcf = params[1] params.insert(0, sys.executable) subdir, progname = os.path.split(progname) subdir = os.path.join(dstdir, subdir) if not os.path.exists(subdir): os.makedirs(subdir) os.chdir(subdir) dstf = '%s.%s' % (progname, os.path.basename(srcf)) scrub = scrub and dstf dstf = dstf if not scrub else 'final.%s' % dstf hash = '------no-file-generated---------' expects = expected.results[hashkey] # If the test has been deliberately skipped, # we are done. Otherwise, execute it even # if we don't know about it yet, so we have # results to compare. result = 'fail' size = 0 try: if 'skip' in expects: result = 'skip requested' return self.skipTest(result) elif 'xfail' in expects: result = 'xfail requested' return self.fail(result) exists = os.path.exists(dstf) if expects or not exists: if exists: os.remove(dstf) if scrub and os.path.exists(scrub): os.remove(scrub) subprocess.call(params) if scrub: PdfWriter(dstf).addpages(PdfReader(scrub).pages).write() with open(dstf, 'rb') as f: data = f.read() size = len(data) if data: hash = hashlib.md5(data).hexdigest() lookup[hash] = dstf prev_results[0] = hash else: os.remove(dstf) if expects: if len(expects) == 1: expects, = expects self.assertEqual(hash, expects) else: self.assertIn(hash, expects) result = 'pass' else: result = 'skip' self.skipTest('No hash available') finally: result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash) with open(hashfile, 'ab') as f: f.write(convert_store(result))
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertIn(x.Root.Pages.Count, ('1', '2')) if len(x.Root.Pages.Kids) == '1': self.assertEqual(x.Size, '7') self.assertEqual(len(x.Root.Pages.Kids), 1) elif len(x.Root.Pages.Kids) == '2': self.assertEqual(x.Size, '10') self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Type, PdfName.Pages) orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0*imgwidthpx/ndpi[0] pageheight = 72.0*imgheightpx/ndpi[1] def format_float(f): if int(f) == f: return str(int(f)) else: return ("%.4f" % f).rstrip("0") self.assertEqual(sorted(cur_page.keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(cur_page.MediaBox, ['0', '0', format_float(pagewidth), format_float(pageheight)]) self.assertEqual(cur_page.Parent, x.Root.Pages) self.assertEqual(cur_page.Type, PdfName.Page) self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject]) self.assertEqual(cur_page.Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(cur_page.Contents.keys(), [PdfName.Length]) self.assertEqual(cur_page.Contents.Length, str(len(cur_page.Contents.stream))) self.assertEqual(cur_page.Contents.stream, "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" "/Im0 Do\nQ" % (pagewidth, pageheight)) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode, PdfName.FlateDecode, [PdfName.CCITTFaxDecode]]) # test if the image has correct size self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [PdfName.DCTDecode, PdfName.JPXDecode]: self.assertEqual( cur_page.Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( cur_page.Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == PdfName.FlateDecode: # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input image imgdata = zlib.decompress( convert_store(cur_page.Resources.XObject.Im0.stream)) if imgprops.DecodeParms: if orig_img.format == 'PNG': pngidat, palette = img2pdf.parse_png(orig_imgdata) elif orig_img.format == 'TIFF' \ and orig_img.info['compression'] == "group4": offset, length = \ img2pdf.ccitt_payload_location_from_pil( orig_img) pngidat = orig_imgdata[offset:offset+length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png") pngidat, palette = img2pdf.parse_png( pngbuffer.getvalue()) self.assertEqual(zlib.decompress(pngidat), imgdata) else: colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does # not have the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertIn(x.Root.Pages.Count, ('1', '2')) if len(x.Root.Pages.Kids) == '1': self.assertEqual(x.Size, '7') self.assertEqual(len(x.Root.Pages.Kids), 1) elif len(x.Root.Pages.Kids) == '2': self.assertEqual(x.Size, '10') self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Type, PdfName.Pages) orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0*imgwidthpx/ndpi[0] pageheight = 72.0*imgheightpx/ndpi[1] def format_float(f): if int(f) == f: return str(int(f)) else: return ("%.4f" % f).rstrip("0") self.assertEqual(sorted(cur_page.keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(cur_page.MediaBox, ['0', '0', format_float(pagewidth), format_float(pageheight)]) self.assertEqual(cur_page.Parent, x.Root.Pages) self.assertEqual(cur_page.Type, PdfName.Page) self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject]) self.assertEqual(cur_page.Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(cur_page.Contents.keys(), [PdfName.Length]) self.assertEqual(cur_page.Contents.Length, str(len(cur_page.Contents.stream))) self.assertEqual(cur_page.Contents.stream, "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" "/Im0 Do\nQ" % (pagewidth, pageheight)) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode], [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( cur_page.Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( cur_page.Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input image imgdata = zlib.decompress( convert_store(cur_page.Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not # have the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode], [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass