示例#1
0
def results():
    results = collections.defaultdict(set)
    with open(expectedf, 'rb') as f:
        for line in f:
            line = convert_load(line)
            line = line.split('#', 1)[0].split()
            if not line:
                continue
            key, value = line
            results[key].add(value)
    return results
示例#2
0
def results():
    results = collections.defaultdict(set)
    with open(expectedf, 'rb') as f:
        for line in f:
            line = convert_load(line)
            line = line.split('#', 1)[0].split()
            if not line:
                continue
            key, value = line
            results[key].add(value)
    return results
示例#3
0
文件: __init__.py 项目: xErik/img2pdf
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
示例#4
0
文件: setup.py 项目: mdabbagh88/pdfrw
#!/usr/bin/env python

from setuptools import setup
from pdfrw import __version__ as version
from pdfrw.py23_diffs import convert_load

setup(
    name='pdfrw',
    version=version,
    description='PDF file reader/writer library',
    long_description=convert_load(open("README.rst", 'rb').read()),
    author='Patrick Maupin',
    author_email='*****@*****.**',
    platforms='Independent',
    url='https://github.com/pmaupin/pdfrw',
    packages=['pdfrw', 'pdfrw.objects'],
    license='MIT',
    classifiers=[
        'Development Status :: 4 - Beta',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
        'Programming Language :: Python',
        'Programming Language :: Python :: 2',
        'Programming Language :: Python :: 2.6',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.3',
        'Programming Language :: Python :: 3.4',

        'Topic :: Multimedia :: Graphics :: Graphics Conversion',
示例#5
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
                                      PdfName.FlateDecode,
                                      [PdfName.CCITTFaxDecode]])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [PdfName.DCTDecode,
                                       PdfName.JPXDecode]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == PdfName.FlateDecode:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    if imgprops.DecodeParms:
                        if orig_img.format == 'PNG':
                            pngidat, palette = img2pdf.parse_png(orig_imgdata)
                        elif orig_img.format == 'TIFF' \
                                and orig_img.info['compression'] == "group4":
                            offset, length = \
                                    img2pdf.ccitt_payload_location_from_pil(
                                            orig_img)
                            pngidat = orig_imgdata[offset:offset+length]
                        else:
                            pngbuffer = BytesIO()
                            orig_img.save(pngbuffer, format="png")
                            pngidat, palette = img2pdf.parse_png(
                                    pngbuffer.getvalue())
                        self.assertEqual(zlib.decompress(pngidat), imgdata)
                    else:
                        colorspace = imgprops.ColorSpace
                        if colorspace == PdfName.DeviceGray:
                            colorspace = 'L'
                        elif colorspace == PdfName.DeviceRGB:
                            colorspace = 'RGB'
                        elif colorspace == PdfName.DeviceCMYK:
                            colorspace = 'CMYK'
                        else:
                            raise Exception("invalid colorspace")
                        im = Image.frombytes(colorspace,
                                             (int(imgprops.Width),
                                              int(imgprops.Height)),
                                             imgdata)
                        if orig_img.mode == '1':
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("L").tobytes())
                        elif orig_img.mode not in ("RGB", "L", "CMYK",
                                                   "CMYK;I"):
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("RGB").tobytes())
                        # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                        # not have the close() method
                        try:
                            im.close()
                        except AttributeError:
                            pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
示例#6
0
文件: setup.py 项目: wzugang/pdfrw
#!/usr/bin/env python

from setuptools import setup
from pdfrw import __version__ as version
from pdfrw.py23_diffs import convert_load

setup(
    name='pdfrw',
    version=version,
    description='PDF file reader/writer library',
    long_description=convert_load(open("README.rst", 'rb').read()),
    author='Patrick Maupin',
    author_email='*****@*****.**',
    platforms='Independent',
    url='https://github.com/pmaupin/pdfrw',
    packages=['pdfrw', 'pdfrw.objects'],
    license='MIT',
    classifiers=[
        'Development Status :: 4 - Beta',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
        'Programming Language :: Python',
        'Programming Language :: Python :: 2',
        'Programming Language :: Python :: 2.6',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.3',
        'Programming Language :: Python :: 3.4',
        'Programming Language :: Python :: 3.5',
        'Topic :: Multimedia :: Graphics :: Graphics Conversion',
示例#7
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                      [PdfName.FlateDecode],
                                      [PdfName.CCITTFaxDecode]])
                # test if the colorspace is valid
                self.assertIn(
                    imgprops.ColorSpace, [PdfName.DeviceGray,
                                          PdfName.DeviceRGB,
                                          PdfName.DeviceCMYK])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [[PdfName.DCTDecode],
                                       [PdfName.JPXDecode]]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == [PdfName.FlateDecode]:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    colorspace = imgprops.ColorSpace
                    if colorspace == PdfName.DeviceGray:
                        colorspace = 'L'
                    elif colorspace == PdfName.DeviceRGB:
                        colorspace = 'RGB'
                    elif colorspace == PdfName.DeviceCMYK:
                        colorspace = 'CMYK'
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                      int(imgprops.Height)),
                                         imgdata)
                    if orig_img.mode == '1':
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("L").tobytes())
                    elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("RGB").tobytes())
                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not
                    # have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode],
                                  [PdfName.CCITTFaxDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                tiff_header = tiff_header_for_ccitt(
                    int(imgprops.Width), int(imgprops.Height),
                    int(imgprops.Length), 4)
                imgio = BytesIO()
                imgio.write(tiff_header)
                imgio.write(convert_store(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                imgio.seek(0)
                im = Image.open(imgio)
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                try:
                    im.close()
                except AttributeError:
                    pass

            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass